diff --git a/third_party/stdlib/calendar.py b/third_party/stdlib/calendar.py new file mode 100644 index 00000000..4c95a493 --- /dev/null +++ b/third_party/stdlib/calendar.py @@ -0,0 +1,713 @@ +"""Calendar printing functions + +Note when comparing these calendars to the ones printed by cal(1): By +default, these calendars have Monday as the first day of the week, and +Sunday as the last (the European convention). Use setfirstweekday() to +set the first day of the week (0=Monday, 6=Sunday).""" + +import sys +import datetime +import locale as _locale + +__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday", + "firstweekday", "isleap", "leapdays", "weekday", "monthrange", + "monthcalendar", "prmonth", "month", "prcal", "calendar", + "timegm", "month_name", "month_abbr", "day_name", "day_abbr"] + +# Exception raised for bad input (with string parameter for details) +error = ValueError + +# Exceptions raised for bad input +class IllegalMonthError(ValueError): + def __init__(self, month): + self.month = month + def __str__(self): + return "bad month number %r; must be 1-12" % self.month + + +class IllegalWeekdayError(ValueError): + def __init__(self, weekday): + self.weekday = weekday + def __str__(self): + return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday + + +# Constants for months referenced later +January = 1 +February = 2 + +# Number of days per month (except for February in leap years) +mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +# This module used to have hard-coded lists of day and month names, as +# English strings. The classes following emulate a read-only version of +# that, but supply localized names. Note that the values are computed +# fresh on each call, in case the user changes locale between calls. + +class _localized_month: + + _months = [datetime.date(2001, i+1, 1).strftime for i in range(12)] + _months.insert(0, lambda x: "") + + def __init__(self, format): + self.format = format + + def __getitem__(self, i): + funcs = self._months[i] + if isinstance(i, slice): + return [f(self.format) for f in funcs] + else: + return funcs(self.format) + + def __len__(self): + return 13 + + +class _localized_day: + + # January 1, 2001, was a Monday. + _days = [datetime.date(2001, 1, i+1).strftime for i in range(7)] + + def __init__(self, format): + self.format = format + + def __getitem__(self, i): + funcs = self._days[i] + if isinstance(i, slice): + return [f(self.format) for f in funcs] + else: + return funcs(self.format) + + def __len__(self): + return 7 + + +# Full and abbreviated names of weekdays +day_name = _localized_day('%A') +day_abbr = _localized_day('%a') + +# Full and abbreviated names of months (1-based arrays!!!) +month_name = _localized_month('%B') +month_abbr = _localized_month('%b') + +# Constants for weekdays +(MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7) + + +def isleap(year): + """Return True for leap years, False for non-leap years.""" + return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) + + +def leapdays(y1, y2): + """Return number of leap years in range [y1, y2). + Assume y1 <= y2.""" + y1 -= 1 + y2 -= 1 + return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400) + + +def weekday(year, month, day): + """Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12), + day (1-31).""" + return datetime.date(year, month, day).weekday() + + +def monthrange(year, month): + """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for + year, month.""" + if not 1 <= month <= 12: + raise IllegalMonthError(month) + day1 = weekday(year, month, 1) + ndays = mdays[month] + (month == February and isleap(year)) + return day1, ndays + + +class Calendar(object): + """ + Base calendar class. This class doesn't do any formatting. It simply + provides data to subclasses. + """ + + def __init__(self, firstweekday=0): + self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday + + def getfirstweekday(self): + return self._firstweekday % 7 + + def setfirstweekday(self, firstweekday): + self._firstweekday = firstweekday + + firstweekday = property(getfirstweekday, setfirstweekday) + + def iterweekdays(self): + """ + Return an iterator for one week of weekday numbers starting with the + configured first one. + """ + for i in range(self.firstweekday, self.firstweekday + 7): + yield i%7 + + def itermonthdates(self, year, month): + """ + Return an iterator for one month. The iterator will yield datetime.date + values and will always iterate through complete weeks, so it will yield + dates outside the specified month. + """ + date = datetime.date(year, month, 1) + # Go back to the beginning of the week + days = (date.weekday() - self.firstweekday) % 7 + date -= datetime.timedelta(days=days) + oneday = datetime.timedelta(days=1) + while True: + yield date + try: + date += oneday + except OverflowError: + # Adding one day could fail after datetime.MAXYEAR + break + if date.month != month and date.weekday() == self.firstweekday: + break + + def itermonthdays2(self, year, month): + """ + Like itermonthdates(), but will yield (day number, weekday number) + tuples. For days outside the specified month the day number is 0. + """ + for date in self.itermonthdates(year, month): + if date.month != month: + yield (0, date.weekday()) + else: + yield (date.day, date.weekday()) + + def itermonthdays(self, year, month): + """ + Like itermonthdates(), but will yield day numbers. For days outside + the specified month the day number is 0. + """ + for date in self.itermonthdates(year, month): + if date.month != month: + yield 0 + else: + yield date.day + + def monthdatescalendar(self, year, month): + """ + Return a matrix (list of lists) representing a month's calendar. + Each row represents a week; week entries are datetime.date values. + """ + dates = list(self.itermonthdates(year, month)) + return [ dates[i:i+7] for i in range(0, len(dates), 7) ] + + def monthdays2calendar(self, year, month): + """ + Return a matrix representing a month's calendar. + Each row represents a week; week entries are + (day number, weekday number) tuples. Day numbers outside this month + are zero. + """ + days = list(self.itermonthdays2(year, month)) + return [ days[i:i+7] for i in range(0, len(days), 7) ] + + def monthdayscalendar(self, year, month): + """ + Return a matrix representing a month's calendar. + Each row represents a week; days outside this month are zero. + """ + days = list(self.itermonthdays(year, month)) + return [ days[i:i+7] for i in range(0, len(days), 7) ] + + def yeardatescalendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting. The return + value is a list of month rows. Each month row contains up to width months. + Each month contains between 4 and 6 weeks and each week contains 1-7 + days. Days are datetime.date objects. + """ + months = [ + self.monthdatescalendar(year, i) + for i in range(January, January+12) + ] + return [months[i:i+width] for i in range(0, len(months), width) ] + + def yeardays2calendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting (similar to + yeardatescalendar()). Entries in the week lists are + (day number, weekday number) tuples. Day numbers outside this month are + zero. + """ + months = [ + self.monthdays2calendar(year, i) + for i in range(January, January+12) + ] + return [months[i:i+width] for i in range(0, len(months), width) ] + + def yeardayscalendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting (similar to + yeardatescalendar()). Entries in the week lists are day numbers. + Day numbers outside this month are zero. + """ + months = [ + self.monthdayscalendar(year, i) + for i in range(January, January+12) + ] + return [months[i:i+width] for i in range(0, len(months), width) ] + + +class TextCalendar(Calendar): + """ + Subclass of Calendar that outputs a calendar as a simple plain text + similar to the UNIX program cal. + """ + + def prweek(self, theweek, width): + """ + Print a single week (no newline). + """ + print self.formatweek(theweek, width), + + def formatday(self, day, weekday, width): + """ + Returns a formatted day. + """ + if day == 0: + s = '' + else: + s = '%2i' % day # right-align single-digit days + return s.center(width) + + def formatweek(self, theweek, width): + """ + Returns a single week in a string (no newline). + """ + return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek) + + def formatweekday(self, day, width): + """ + Returns a formatted week day name. + """ + if width >= 9: + names = day_name + else: + names = day_abbr + return names[day][:width].center(width) + + def formatweekheader(self, width): + """ + Return a header for a week. + """ + return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays()) + + def formatmonthname(self, theyear, themonth, width, withyear=True): + """ + Return a formatted month name. + """ + s = month_name[themonth] + if withyear: + s = "%s %r" % (s, theyear) + return s.center(width) + + def prmonth(self, theyear, themonth, w=0, l=0): + """ + Print a month's calendar. + """ + print self.formatmonth(theyear, themonth, w, l), + + def formatmonth(self, theyear, themonth, w=0, l=0): + """ + Return a month's calendar string (multi-line). + """ + w = max(2, w) + l = max(1, l) + s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1) + s = s.rstrip() + s += '\n' * l + s += self.formatweekheader(w).rstrip() + s += '\n' * l + for week in self.monthdays2calendar(theyear, themonth): + s += self.formatweek(week, w).rstrip() + s += '\n' * l + return s + + def formatyear(self, theyear, w=2, l=1, c=6, m=3): + """ + Returns a year's calendar as a multi-line string. + """ + w = max(2, w) + l = max(1, l) + c = max(2, c) + colwidth = (w + 1) * 7 - 1 + v = [] + a = v.append + a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip()) + a('\n'*l) + header = self.formatweekheader(w) + for (i, row) in enumerate(self.yeardays2calendar(theyear, m)): + # months in this row + months = range(m*i+1, min(m*(i+1)+1, 13)) + a('\n'*l) + names = (self.formatmonthname(theyear, k, colwidth, False) + for k in months) + a(formatstring(names, colwidth, c).rstrip()) + a('\n'*l) + headers = (header for k in months) + a(formatstring(headers, colwidth, c).rstrip()) + a('\n'*l) + # max number of weeks for this row + height = max(len(cal) for cal in row) + for j in range(height): + weeks = [] + for cal in row: + if j >= len(cal): + weeks.append('') + else: + weeks.append(self.formatweek(cal[j], w)) + a(formatstring(weeks, colwidth, c).rstrip()) + a('\n' * l) + return ''.join(v) + + def pryear(self, theyear, w=0, l=0, c=6, m=3): + """Print a year's calendar.""" + print self.formatyear(theyear, w, l, c, m) + + +class HTMLCalendar(Calendar): + """ + This calendar returns complete HTML pages. + """ + + # CSS classes for the day s + cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"] + + def formatday(self, day, weekday): + """ + Return a day as a table cell. + """ + if day == 0: + return ' ' # day outside month + else: + return '%d' % (self.cssclasses[weekday], day) + + def formatweek(self, theweek): + """ + Return a complete week as a table row. + """ + s = ''.join(self.formatday(d, wd) for (d, wd) in theweek) + return '%s' % s + + def formatweekday(self, day): + """ + Return a weekday name as a table header. + """ + return '%s' % (self.cssclasses[day], day_abbr[day]) + + def formatweekheader(self): + """ + Return a header for a week as a table row. + """ + s = ''.join(self.formatweekday(i) for i in self.iterweekdays()) + return '%s' % s + + def formatmonthname(self, theyear, themonth, withyear=True): + """ + Return a month name as a table row. + """ + if withyear: + s = '%s %s' % (month_name[themonth], theyear) + else: + s = '%s' % month_name[themonth] + return '%s' % s + + def formatmonth(self, theyear, themonth, withyear=True): + """ + Return a formatted month as a table. + """ + v = [] + a = v.append + a('') + a('\n') + a(self.formatmonthname(theyear, themonth, withyear=withyear)) + a('\n') + a(self.formatweekheader()) + a('\n') + for week in self.monthdays2calendar(theyear, themonth): + a(self.formatweek(week)) + a('\n') + a('
') + a('\n') + return ''.join(v) + + def formatyear(self, theyear, width=3): + """ + Return a formatted year as a table of tables. + """ + v = [] + a = v.append + width = max(width, 1) + a('') + a('\n') + a('' % (width, theyear)) + for i in range(January, January+12, width): + # months in this row + months = range(i, min(i+width, 13)) + a('') + for m in months: + a('') + a('') + a('
%s
') + a(self.formatmonth(theyear, m, withyear=False)) + a('
') + return ''.join(v) + + def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None): + """ + Return a formatted year as a complete HTML page. + """ + if encoding is None: + encoding = sys.getdefaultencoding() + v = [] + a = v.append + a('\n' % encoding) + a('\n') + a('\n') + a('\n') + a('\n' % encoding) + if css is not None: + a('\n' % css) + a('Calendar for %d\n' % theyear) + a('\n') + a('\n') + a(self.formatyear(theyear, width)) + a('\n') + a('\n') + return ''.join(v).encode(encoding, "xmlcharrefreplace") + + +class TimeEncoding: + def __init__(self, locale): + self.locale = locale + + def __enter__(self): + self.oldlocale = _locale.getlocale(_locale.LC_TIME) + _locale.setlocale(_locale.LC_TIME, self.locale) + return _locale.getlocale(_locale.LC_TIME)[1] + + def __exit__(self, *args): + _locale.setlocale(_locale.LC_TIME, self.oldlocale) + + +class LocaleTextCalendar(TextCalendar): + """ + This class can be passed a locale name in the constructor and will return + month and weekday names in the specified locale. If this locale includes + an encoding all strings containing month and weekday names will be returned + as unicode. + """ + + def __init__(self, firstweekday=0, locale=None): + TextCalendar.__init__(self, firstweekday) + if locale is None: + locale = _locale.getdefaultlocale() + self.locale = locale + + def formatweekday(self, day, width): + with TimeEncoding(self.locale) as encoding: + if width >= 9: + names = day_name + else: + names = day_abbr + name = names[day] + if encoding is not None: + name = name.decode(encoding) + return name[:width].center(width) + + def formatmonthname(self, theyear, themonth, width, withyear=True): + with TimeEncoding(self.locale) as encoding: + s = month_name[themonth] + if encoding is not None: + s = s.decode(encoding) + if withyear: + s = "%s %r" % (s, theyear) + return s.center(width) + + +class LocaleHTMLCalendar(HTMLCalendar): + """ + This class can be passed a locale name in the constructor and will return + month and weekday names in the specified locale. If this locale includes + an encoding all strings containing month and weekday names will be returned + as unicode. + """ + def __init__(self, firstweekday=0, locale=None): + HTMLCalendar.__init__(self, firstweekday) + if locale is None: + locale = _locale.getdefaultlocale() + self.locale = locale + + def formatweekday(self, day): + with TimeEncoding(self.locale) as encoding: + s = day_abbr[day] + if encoding is not None: + s = s.decode(encoding) + return '%s' % (self.cssclasses[day], s) + + def formatmonthname(self, theyear, themonth, withyear=True): + with TimeEncoding(self.locale) as encoding: + s = month_name[themonth] + if encoding is not None: + s = s.decode(encoding) + if withyear: + s = '%s %s' % (s, theyear) + return '%s' % s + + +# Support for old module level interface +c = TextCalendar() + +firstweekday = c.getfirstweekday + +def setfirstweekday(firstweekday): + try: + firstweekday.__index__ + except AttributeError: + raise IllegalWeekdayError(firstweekday) + if not MONDAY <= firstweekday <= SUNDAY: + raise IllegalWeekdayError(firstweekday) + c.firstweekday = firstweekday + +monthcalendar = c.monthdayscalendar +prweek = c.prweek +week = c.formatweek +weekheader = c.formatweekheader +prmonth = c.prmonth +month = c.formatmonth +calendar = c.formatyear +prcal = c.pryear + + +# Spacing of month columns for multi-column year calendar +_colwidth = 7*3 - 1 # Amount printed by prweek() +_spacing = 6 # Number of spaces between columns + + +def format(cols, colwidth=_colwidth, spacing=_spacing): + """Prints multi-column formatting for year calendars""" + print formatstring(cols, colwidth, spacing) + + +def formatstring(cols, colwidth=_colwidth, spacing=_spacing): + """Returns a string formatted from n strings, centered within n columns.""" + spacing *= ' ' + return spacing.join(c.center(colwidth) for c in cols) + + +EPOCH = 1970 +_EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal() + + +def timegm(tuple): + """Unrelated but handy function to calculate Unix timestamp from GMT.""" + year, month, day, hour, minute, second = tuple[:6] + days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1 + hours = days*24 + hour + minutes = hours*60 + minute + seconds = minutes*60 + second + return seconds + + +def main(args): + import optparse + parser = optparse.OptionParser(usage="usage: %prog [options] [year [month]]") + parser.add_option( + "-w", "--width", + dest="width", type="int", default=2, + help="width of date column (default 2, text only)" + ) + parser.add_option( + "-l", "--lines", + dest="lines", type="int", default=1, + help="number of lines for each week (default 1, text only)" + ) + parser.add_option( + "-s", "--spacing", + dest="spacing", type="int", default=6, + help="spacing between months (default 6, text only)" + ) + parser.add_option( + "-m", "--months", + dest="months", type="int", default=3, + help="months per row (default 3, text only)" + ) + parser.add_option( + "-c", "--css", + dest="css", default="calendar.css", + help="CSS to use for page (html only)" + ) + parser.add_option( + "-L", "--locale", + dest="locale", default=None, + help="locale to be used from month and weekday names" + ) + parser.add_option( + "-e", "--encoding", + dest="encoding", default=None, + help="Encoding to use for output" + ) + parser.add_option( + "-t", "--type", + dest="type", default="text", + choices=("text", "html"), + help="output type (text or html)" + ) + + (options, args) = parser.parse_args(args) + + if options.locale and not options.encoding: + parser.error("if --locale is specified --encoding is required") + sys.exit(1) + + locale = options.locale, options.encoding + + if options.type == "html": + if options.locale: + cal = LocaleHTMLCalendar(locale=locale) + else: + cal = HTMLCalendar() + encoding = options.encoding + if encoding is None: + encoding = sys.getdefaultencoding() + optdict = dict(encoding=encoding, css=options.css) + if len(args) == 1: + print cal.formatyearpage(datetime.date.today().year, **optdict) + elif len(args) == 2: + print cal.formatyearpage(int(args[1]), **optdict) + else: + parser.error("incorrect number of arguments") + sys.exit(1) + else: + if options.locale: + cal = LocaleTextCalendar(locale=locale) + else: + cal = TextCalendar() + optdict = dict(w=options.width, l=options.lines) + if len(args) != 3: + optdict["c"] = options.spacing + optdict["m"] = options.months + if len(args) == 1: + result = cal.formatyear(datetime.date.today().year, **optdict) + elif len(args) == 2: + result = cal.formatyear(int(args[1]), **optdict) + elif len(args) == 3: + result = cal.formatmonth(int(args[1]), int(args[2]), **optdict) + else: + parser.error("incorrect number of arguments") + sys.exit(1) + if options.encoding: + result = result.encode(options.encoding) + print result + + +if __name__ == "__main__": + main(sys.argv) diff --git a/third_party/stdlib/cgi.py b/third_party/stdlib/cgi.py new file mode 100755 index 00000000..7c51b44d --- /dev/null +++ b/third_party/stdlib/cgi.py @@ -0,0 +1,1059 @@ +#! /usr/local/bin/python + +# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is +# intentionally NOT "/usr/bin/env python". On many systems +# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI +# scripts, and /usr/local/bin is the default directory where Python is +# installed, so /usr/bin/env would be unable to find python. Granted, +# binary installations by Linux vendors often install Python in +# /usr/bin. So let those vendors patch cgi.py to match their choice +# of installation. + +"""Support module for CGI (Common Gateway Interface) scripts. + +This module defines a number of utilities for use by CGI scripts +written in Python. +""" + +# XXX Perhaps there should be a slimmed version that doesn't contain +# all those backwards compatible and debugging classes and functions? + +# History +# ------- +# +# Michael McLay started this module. Steve Majewski changed the +# interface to SvFormContentDict and FormContentDict. The multipart +# parsing was inspired by code submitted by Andreas Paepcke. Guido van +# Rossum rewrote, reformatted and documented the module and is currently +# responsible for its maintenance. +# + +__version__ = "2.6" + + +# Imports +# ======= + +from operator import attrgetter +import sys +import os +import UserDict +import urlparse + +from warnings import filterwarnings, catch_warnings, warn +with catch_warnings(): + if sys.py3kwarning: + filterwarnings("ignore", ".*mimetools has been removed", + DeprecationWarning) + filterwarnings("ignore", ".*rfc822 has been removed", + DeprecationWarning) + import mimetools + import rfc822 + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +__all__ = ["MiniFieldStorage", "FieldStorage", "FormContentDict", + "SvFormContentDict", "InterpFormContentDict", "FormContent", + "parse", "parse_qs", "parse_qsl", "parse_multipart", + "parse_header", "print_exception", "print_environ", + "print_form", "print_directory", "print_arguments", + "print_environ_usage", "escape"] + +# Logging support +# =============== + +logfile = "" # Filename to log to, if not empty +logfp = None # File object to log to, if not None + +def initlog(*allargs): + """Write a log message, if there is a log file. + + Even though this function is called initlog(), you should always + use log(); log is a variable that is set either to initlog + (initially), to dolog (once the log file has been opened), or to + nolog (when logging is disabled). + + The first argument is a format string; the remaining arguments (if + any) are arguments to the % operator, so e.g. + log("%s: %s", "a", "b") + will write "a: b" to the log file, followed by a newline. + + If the global logfp is not None, it should be a file object to + which log data is written. + + If the global logfp is None, the global logfile may be a string + giving a filename to open, in append mode. This file should be + world writable!!! If the file can't be opened, logging is + silently disabled (since there is no safe place where we could + send an error message). + + """ + global logfp, log + if logfile and not logfp: + try: + logfp = open(logfile, "a") + except IOError: + pass + if not logfp: + log = nolog + else: + log = dolog + log(*allargs) + +def dolog(fmt, *args): + """Write a log message to the log file. See initlog() for docs.""" + logfp.write(fmt%args + "\n") + +def nolog(*allargs): + """Dummy function, assigned to log when logging is disabled.""" + pass + +log = initlog # The current logging function + + +# Parsing functions +# ================= + +# Maximum input we will accept when REQUEST_METHOD is POST +# 0 ==> unlimited input +maxlen = 0 + +def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): + """Parse a query in the environment or from a file (default stdin) + + Arguments, all optional: + + fp : file pointer; default: sys.stdin + + environ : environment dictionary; default: os.environ + + keep_blank_values: flag indicating whether blank values in + percent-encoded forms should be treated as blank strings. + A true value indicates that blanks should be retained as + blank strings. The default false value indicates that + blank values are to be ignored and treated as if they were + not included. + + strict_parsing: flag indicating what to do with parsing errors. + If false (the default), errors are silently ignored. + If true, errors raise a ValueError exception. + """ + if fp is None: + fp = sys.stdin + if not 'REQUEST_METHOD' in environ: + environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone + if environ['REQUEST_METHOD'] == 'POST': + ctype, pdict = parse_header(environ['CONTENT_TYPE']) + if ctype == 'multipart/form-data': + return parse_multipart(fp, pdict) + elif ctype == 'application/x-www-form-urlencoded': + clength = int(environ['CONTENT_LENGTH']) + if maxlen and clength > maxlen: + raise ValueError, 'Maximum content length exceeded' + qs = fp.read(clength) + else: + qs = '' # Unknown content-type + if 'QUERY_STRING' in environ: + if qs: qs = qs + '&' + qs = qs + environ['QUERY_STRING'] + elif sys.argv[1:]: + if qs: qs = qs + '&' + qs = qs + sys.argv[1] + environ['QUERY_STRING'] = qs # XXX Shouldn't, really + elif 'QUERY_STRING' in environ: + qs = environ['QUERY_STRING'] + else: + if sys.argv[1:]: + qs = sys.argv[1] + else: + qs = "" + environ['QUERY_STRING'] = qs # XXX Shouldn't, really + return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) + + +# parse query string function called from urlparse, +# this is done in order to maintain backward compatibility. + +def parse_qs(qs, keep_blank_values=0, strict_parsing=0): + """Parse a query given as a string argument.""" + warn("cgi.parse_qs is deprecated, use urlparse.parse_qs instead", + PendingDeprecationWarning, 2) + return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) + + +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): + """Parse a query given as a string argument.""" + warn("cgi.parse_qsl is deprecated, use urlparse.parse_qsl instead", + PendingDeprecationWarning, 2) + return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing) + +def parse_multipart(fp, pdict): + """Parse multipart input. + + Arguments: + fp : input file + pdict: dictionary containing other parameters of content-type header + + Returns a dictionary just like parse_qs(): keys are the field names, each + value is a list of values for that field. This is easy to use but not + much good if you are expecting megabytes to be uploaded -- in that case, + use the FieldStorage class instead which is much more flexible. Note + that content-type is the raw, unparsed contents of the content-type + header. + + XXX This does not parse nested multipart parts -- use FieldStorage for + that. + + XXX This should really be subsumed by FieldStorage altogether -- no + point in having two implementations of the same parsing algorithm. + Also, FieldStorage protects itself better against certain DoS attacks + by limiting the size of the data read in one chunk. The API here + does not support that kind of protection. This also affects parse() + since it can call parse_multipart(). + + """ + boundary = "" + if 'boundary' in pdict: + boundary = pdict['boundary'] + if not valid_boundary(boundary): + raise ValueError, ('Invalid boundary in multipart form: %r' + % (boundary,)) + + nextpart = "--" + boundary + lastpart = "--" + boundary + "--" + partdict = {} + terminator = "" + + while terminator != lastpart: + bytes = -1 + data = None + if terminator: + # At start of next part. Read headers first. + headers = mimetools.Message(fp) + clength = headers.getheader('content-length') + if clength: + try: + bytes = int(clength) + except ValueError: + pass + if bytes > 0: + if maxlen and bytes > maxlen: + raise ValueError, 'Maximum content length exceeded' + data = fp.read(bytes) + else: + data = "" + # Read lines until end of part. + lines = [] + while 1: + line = fp.readline() + if not line: + terminator = lastpart # End outer loop + break + if line[:2] == "--": + terminator = line.strip() + if terminator in (nextpart, lastpart): + break + lines.append(line) + # Done with part. + if data is None: + continue + if bytes < 0: + if lines: + # Strip final line terminator + line = lines[-1] + if line[-2:] == "\r\n": + line = line[:-2] + elif line[-1:] == "\n": + line = line[:-1] + lines[-1] = line + data = "".join(lines) + line = headers['content-disposition'] + if not line: + continue + key, params = parse_header(line) + if key != 'form-data': + continue + if 'name' in params: + name = params['name'] + else: + continue + if name in partdict: + partdict[name].append(data) + else: + partdict[name] = [data] + + return partdict + + +def _parseparam(s): + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] + +def parse_header(line): + """Parse a Content-type like header. + + Return the main content-type and a dictionary of options. + + """ + parts = _parseparam(';' + line) + key = parts.next() + pdict = {} + for p in parts: + i = p.find('=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i+1:].strip() + if len(value) >= 2 and value[0] == value[-1] == '"': + value = value[1:-1] + value = value.replace('\\\\', '\\').replace('\\"', '"') + pdict[name] = value + return key, pdict + + +# Classes for field storage +# ========================= + +class MiniFieldStorage: + + """Like FieldStorage, for use when no file uploads are possible.""" + + # Dummy attributes + filename = None + list = None + type = None + file = None + type_options = {} + disposition = None + disposition_options = {} + headers = {} + + def __init__(self, name, value): + """Constructor from field name and value.""" + self.name = name + self.value = value + # self.file = StringIO(value) + + def __repr__(self): + """Return printable representation.""" + return "MiniFieldStorage(%r, %r)" % (self.name, self.value) + + +class FieldStorage: + + """Store a sequence of fields, reading multipart/form-data. + + This class provides naming, typing, files stored on disk, and + more. At the top level, it is accessible like a dictionary, whose + keys are the field names. (Note: None can occur as a field name.) + The items are either a Python list (if there's multiple values) or + another FieldStorage or MiniFieldStorage object. If it's a single + object, it has the following attributes: + + name: the field name, if specified; otherwise None + + filename: the filename, if specified; otherwise None; this is the + client side filename, *not* the file name on which it is + stored (that's a temporary file you don't deal with) + + value: the value as a *string*; for file uploads, this + transparently reads the file every time you request the value + + file: the file(-like) object from which you can read the data; + None if the data is stored a simple string + + type: the content-type, or None if not specified + + type_options: dictionary of options specified on the content-type + line + + disposition: content-disposition, or None if not specified + + disposition_options: dictionary of corresponding options + + headers: a dictionary(-like) object (sometimes rfc822.Message or a + subclass thereof) containing *all* headers + + The class is subclassable, mostly for the purpose of overriding + the make_file() method, which is called internally to come up with + a file open for reading and writing. This makes it possible to + override the default choice of storing all files in a temporary + directory and unlinking them as soon as they have been opened. + + """ + + def __init__(self, fp=None, headers=None, outerboundary="", + environ=os.environ, keep_blank_values=0, strict_parsing=0): + """Constructor. Read multipart/* until last part. + + Arguments, all optional: + + fp : file pointer; default: sys.stdin + (not used when the request method is GET) + + headers : header dictionary-like object; default: + taken from environ as per CGI spec + + outerboundary : terminating multipart boundary + (for internal use only) + + environ : environment dictionary; default: os.environ + + keep_blank_values: flag indicating whether blank values in + percent-encoded forms should be treated as blank strings. + A true value indicates that blanks should be retained as + blank strings. The default false value indicates that + blank values are to be ignored and treated as if they were + not included. + + strict_parsing: flag indicating what to do with parsing errors. + If false (the default), errors are silently ignored. + If true, errors raise a ValueError exception. + + """ + method = 'GET' + self.keep_blank_values = keep_blank_values + self.strict_parsing = strict_parsing + if 'REQUEST_METHOD' in environ: + method = environ['REQUEST_METHOD'].upper() + self.qs_on_post = None + if method == 'GET' or method == 'HEAD': + if 'QUERY_STRING' in environ: + qs = environ['QUERY_STRING'] + elif sys.argv[1:]: + qs = sys.argv[1] + else: + qs = "" + fp = StringIO(qs) + if headers is None: + headers = {'content-type': + "application/x-www-form-urlencoded"} + if headers is None: + headers = {} + if method == 'POST': + # Set default content-type for POST to what's traditional + headers['content-type'] = "application/x-www-form-urlencoded" + if 'CONTENT_TYPE' in environ: + headers['content-type'] = environ['CONTENT_TYPE'] + if 'QUERY_STRING' in environ: + self.qs_on_post = environ['QUERY_STRING'] + if 'CONTENT_LENGTH' in environ: + headers['content-length'] = environ['CONTENT_LENGTH'] + self.fp = fp or sys.stdin + self.headers = headers + self.outerboundary = outerboundary + + # Process content-disposition header + cdisp, pdict = "", {} + if 'content-disposition' in self.headers: + cdisp, pdict = parse_header(self.headers['content-disposition']) + self.disposition = cdisp + self.disposition_options = pdict + self.name = None + if 'name' in pdict: + self.name = pdict['name'] + self.filename = None + if 'filename' in pdict: + self.filename = pdict['filename'] + + # Process content-type header + # + # Honor any existing content-type header. But if there is no + # content-type header, use some sensible defaults. Assume + # outerboundary is "" at the outer level, but something non-false + # inside a multi-part. The default for an inner part is text/plain, + # but for an outer part it should be urlencoded. This should catch + # bogus clients which erroneously forget to include a content-type + # header. + # + # See below for what we do if there does exist a content-type header, + # but it happens to be something we don't understand. + if 'content-type' in self.headers: + ctype, pdict = parse_header(self.headers['content-type']) + elif self.outerboundary or method != 'POST': + ctype, pdict = "text/plain", {} + else: + ctype, pdict = 'application/x-www-form-urlencoded', {} + self.type = ctype + self.type_options = pdict + self.innerboundary = "" + if 'boundary' in pdict: + self.innerboundary = pdict['boundary'] + clen = -1 + if 'content-length' in self.headers: + try: + clen = int(self.headers['content-length']) + except ValueError: + pass + if maxlen and clen > maxlen: + raise ValueError, 'Maximum content length exceeded' + self.length = clen + + self.list = self.file = None + self.done = 0 + if ctype == 'application/x-www-form-urlencoded': + self.read_urlencoded() + elif ctype[:10] == 'multipart/': + self.read_multi(environ, keep_blank_values, strict_parsing) + else: + self.read_single() + + def __repr__(self): + """Return a printable representation.""" + return "FieldStorage(%r, %r, %r)" % ( + self.name, self.filename, self.value) + + def __iter__(self): + return iter(self.keys()) + + def __getattr__(self, name): + if name != 'value': + raise AttributeError, name + if self.file: + self.file.seek(0) + value = self.file.read() + self.file.seek(0) + elif self.list is not None: + value = self.list + else: + value = None + return value + + def __getitem__(self, key): + """Dictionary style indexing.""" + if self.list is None: + raise TypeError, "not indexable" + found = [] + for item in self.list: + if item.name == key: found.append(item) + if not found: + raise KeyError, key + if len(found) == 1: + return found[0] + else: + return found + + def getvalue(self, key, default=None): + """Dictionary style get() method, including 'value' lookup.""" + if key in self: + value = self[key] + if type(value) is type([]): + return map(attrgetter('value'), value) + else: + return value.value + else: + return default + + def getfirst(self, key, default=None): + """ Return the first value received.""" + if key in self: + value = self[key] + if type(value) is type([]): + return value[0].value + else: + return value.value + else: + return default + + def getlist(self, key): + """ Return list of received values.""" + if key in self: + value = self[key] + if type(value) is type([]): + return map(attrgetter('value'), value) + else: + return [value.value] + else: + return [] + + def keys(self): + """Dictionary style keys() method.""" + if self.list is None: + raise TypeError, "not indexable" + return list(set(item.name for item in self.list)) + + def has_key(self, key): + """Dictionary style has_key() method.""" + if self.list is None: + raise TypeError, "not indexable" + return any(item.name == key for item in self.list) + + def __contains__(self, key): + """Dictionary style __contains__ method.""" + if self.list is None: + raise TypeError, "not indexable" + return any(item.name == key for item in self.list) + + def __len__(self): + """Dictionary style len(x) support.""" + return len(self.keys()) + + def __nonzero__(self): + return bool(self.list) + + def read_urlencoded(self): + """Internal: read data in query string format.""" + qs = self.fp.read(self.length) + if self.qs_on_post: + qs += '&' + self.qs_on_post + self.list = list = [] + for key, value in urlparse.parse_qsl(qs, self.keep_blank_values, + self.strict_parsing): + list.append(MiniFieldStorage(key, value)) + self.skip_lines() + + FieldStorageClass = None + + def read_multi(self, environ, keep_blank_values, strict_parsing): + """Internal: read a part that is itself multipart.""" + ib = self.innerboundary + if not valid_boundary(ib): + raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,) + self.list = [] + if self.qs_on_post: + for key, value in urlparse.parse_qsl(self.qs_on_post, + self.keep_blank_values, self.strict_parsing): + self.list.append(MiniFieldStorage(key, value)) + FieldStorageClass = None + + klass = self.FieldStorageClass or self.__class__ + part = klass(self.fp, {}, ib, + environ, keep_blank_values, strict_parsing) + # Throw first part away + while not part.done: + headers = rfc822.Message(self.fp) + part = klass(self.fp, headers, ib, + environ, keep_blank_values, strict_parsing) + self.list.append(part) + self.skip_lines() + + def read_single(self): + """Internal: read an atomic part.""" + if self.length >= 0: + self.read_binary() + self.skip_lines() + else: + self.read_lines() + self.file.seek(0) + + bufsize = 8*1024 # I/O buffering size for copy to file + + def read_binary(self): + """Internal: read binary data.""" + self.file = self.make_file('b') + todo = self.length + if todo >= 0: + while todo > 0: + data = self.fp.read(min(todo, self.bufsize)) + if not data: + self.done = -1 + break + self.file.write(data) + todo = todo - len(data) + + def read_lines(self): + """Internal: read lines until EOF or outerboundary.""" + self.file = self.__file = StringIO() + if self.outerboundary: + self.read_lines_to_outerboundary() + else: + self.read_lines_to_eof() + + def __write(self, line): + if self.__file is not None: + if self.__file.tell() + len(line) > 1000: + self.file = self.make_file('') + self.file.write(self.__file.getvalue()) + self.__file = None + self.file.write(line) + + def read_lines_to_eof(self): + """Internal: read lines until EOF.""" + while 1: + line = self.fp.readline(1<<16) + if not line: + self.done = -1 + break + self.__write(line) + + def read_lines_to_outerboundary(self): + """Internal: read lines until outerboundary.""" + next = "--" + self.outerboundary + last = next + "--" + delim = "" + last_line_lfend = True + while 1: + line = self.fp.readline(1<<16) + if not line: + self.done = -1 + break + if delim == "\r": + line = delim + line + delim = "" + if line[:2] == "--" and last_line_lfend: + strippedline = line.strip() + if strippedline == next: + break + if strippedline == last: + self.done = 1 + break + odelim = delim + if line[-2:] == "\r\n": + delim = "\r\n" + line = line[:-2] + last_line_lfend = True + elif line[-1] == "\n": + delim = "\n" + line = line[:-1] + last_line_lfend = True + elif line[-1] == "\r": + # We may interrupt \r\n sequences if they span the 2**16 + # byte boundary + delim = "\r" + line = line[:-1] + last_line_lfend = False + else: + delim = "" + last_line_lfend = False + self.__write(odelim + line) + + def skip_lines(self): + """Internal: skip lines until outer boundary if defined.""" + if not self.outerboundary or self.done: + return + next = "--" + self.outerboundary + last = next + "--" + last_line_lfend = True + while 1: + line = self.fp.readline(1<<16) + if not line: + self.done = -1 + break + if line[:2] == "--" and last_line_lfend: + strippedline = line.strip() + if strippedline == next: + break + if strippedline == last: + self.done = 1 + break + last_line_lfend = line.endswith('\n') + + def make_file(self, binary=None): + """Overridable: return a readable & writable file. + + The file will be used as follows: + - data is written to it + - seek(0) + - data is read from it + + The 'binary' argument is unused -- the file is always opened + in binary mode. + + This version opens a temporary file for reading and writing, + and immediately deletes (unlinks) it. The trick (on Unix!) is + that the file can still be used, but it can't be opened by + another process, and it will automatically be deleted when it + is closed or when the current process terminates. + + If you want a more permanent file, you derive a class which + overrides this method. If you want a visible temporary file + that is nevertheless automatically deleted when the script + terminates, try defining a __del__ method in a derived class + which unlinks the temporary files you have created. + + """ + import tempfile + return tempfile.TemporaryFile("w+b") + + + +# Backwards Compatibility Classes +# =============================== + +class FormContentDict(UserDict.UserDict): + """Form content as dictionary with a list of values per field. + + form = FormContentDict() + + form[key] -> [value, value, ...] + key in form -> Boolean + form.keys() -> [key, key, ...] + form.values() -> [[val, val, ...], [val, val, ...], ...] + form.items() -> [(key, [val, val, ...]), (key, [val, val, ...]), ...] + form.dict == {key: [val, val, ...], ...} + + """ + def __init__(self, environ=os.environ, keep_blank_values=0, strict_parsing=0): + self.dict = self.data = parse(environ=environ, + keep_blank_values=keep_blank_values, + strict_parsing=strict_parsing) + self.query_string = environ['QUERY_STRING'] + + +class SvFormContentDict(FormContentDict): + """Form content as dictionary expecting a single value per field. + + If you only expect a single value for each field, then form[key] + will return that single value. It will raise an IndexError if + that expectation is not true. If you expect a field to have + possible multiple values, than you can use form.getlist(key) to + get all of the values. values() and items() are a compromise: + they return single strings where there is a single value, and + lists of strings otherwise. + + """ + def __getitem__(self, key): + if len(self.dict[key]) > 1: + raise IndexError, 'expecting a single value' + return self.dict[key][0] + def getlist(self, key): + return self.dict[key] + def values(self): + result = [] + for value in self.dict.values(): + if len(value) == 1: + result.append(value[0]) + else: result.append(value) + return result + def items(self): + result = [] + for key, value in self.dict.items(): + if len(value) == 1: + result.append((key, value[0])) + else: result.append((key, value)) + return result + + +class InterpFormContentDict(SvFormContentDict): + """This class is present for backwards compatibility only.""" + def __getitem__(self, key): + v = SvFormContentDict.__getitem__(self, key) + if v[0] in '0123456789+-.': + try: return int(v) + except ValueError: + try: return float(v) + except ValueError: pass + return v.strip() + def values(self): + result = [] + for key in self.keys(): + try: + result.append(self[key]) + except IndexError: + result.append(self.dict[key]) + return result + def items(self): + result = [] + for key in self.keys(): + try: + result.append((key, self[key])) + except IndexError: + result.append((key, self.dict[key])) + return result + + +class FormContent(FormContentDict): + """This class is present for backwards compatibility only.""" + def values(self, key): + if key in self.dict :return self.dict[key] + else: return None + def indexed_value(self, key, location): + if key in self.dict: + if len(self.dict[key]) > location: + return self.dict[key][location] + else: return None + else: return None + def value(self, key): + if key in self.dict: return self.dict[key][0] + else: return None + def length(self, key): + return len(self.dict[key]) + def stripped(self, key): + if key in self.dict: return self.dict[key][0].strip() + else: return None + def pars(self): + return self.dict + + +# Test/debug code +# =============== + +def test(environ=os.environ): + """Robust test CGI script, usable as main program. + + Write minimal HTTP headers and dump all information provided to + the script in HTML form. + + """ + print "Content-type: text/html" + print + sys.stderr = sys.stdout + try: + form = FieldStorage() # Replace with other classes to test those + print_directory() + print_arguments() + print_form(form) + print_environ(environ) + print_environ_usage() + def f(): + exec "testing print_exception() -- italics?" + def g(f=f): + f() + print "

What follows is a test, not an actual exception:

" + g() + except: + print_exception() + + print "

Second try with a small maxlen...

" + + global maxlen + maxlen = 50 + try: + form = FieldStorage() # Replace with other classes to test those + print_directory() + print_arguments() + print_form(form) + print_environ(environ) + except: + print_exception() + +def print_exception(type=None, value=None, tb=None, limit=None): + if type is None: + type, value, tb = sys.exc_info() + import traceback + print + print "

Traceback (most recent call last):

" + list = traceback.format_tb(tb, limit) + \ + traceback.format_exception_only(type, value) + print "
%s%s
" % ( + escape("".join(list[:-1])), + escape(list[-1]), + ) + del tb + +def print_environ(environ=os.environ): + """Dump the shell environment as HTML.""" + keys = environ.keys() + keys.sort() + print + print "

Shell Environment:

" + print "
" + for key in keys: + print "
", escape(key), "
", escape(environ[key]) + print "
" + print + +def print_form(form): + """Dump the contents of a form as HTML.""" + keys = form.keys() + keys.sort() + print + print "

Form Contents:

" + if not keys: + print "

No form fields." + print "

" + for key in keys: + print "
" + escape(key) + ":", + value = form[key] + print "" + escape(repr(type(value))) + "" + print "
" + escape(repr(value)) + print "
" + print + +def print_directory(): + """Dump the current directory as HTML.""" + print + print "

Current Working Directory:

" + try: + pwd = os.getcwd() + except os.error, msg: + print "os.error:", escape(str(msg)) + else: + print escape(pwd) + print + +def print_arguments(): + print + print "

Command Line Arguments:

" + print + print sys.argv + print + +def print_environ_usage(): + """Dump a list of environment variables used by CGI as HTML.""" + print """ +

These environment variables could have been set:

+ +In addition, HTTP headers sent by the server may be passed in the +environment as well. Here are some common variable names: + +""" + + +# Utilities +# ========= + +def escape(s, quote=None): + '''Replace special characters "&", "<" and ">" to HTML-safe sequences. + If the optional flag quote is true, the quotation mark character (") + is also translated.''' + s = s.replace("&", "&") # Must be done first! + s = s.replace("<", "<") + s = s.replace(">", ">") + if quote: + s = s.replace('"', """) + return s + +def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"): + import re + return re.match(_vb_pattern, s) + +# Invoke mainline +# =============== + +# Call test() when this file is run as a script (not imported as a module) +if __name__ == '__main__': + test() diff --git a/third_party/stdlib/cmd.py b/third_party/stdlib/cmd.py new file mode 100644 index 00000000..628dc2ac --- /dev/null +++ b/third_party/stdlib/cmd.py @@ -0,0 +1,403 @@ +"""A generic class to build line-oriented command interpreters. + +Interpreters constructed with this class obey the following conventions: + +1. End of file on input is processed as the command 'EOF'. +2. A command is parsed out of each line by collecting the prefix composed + of characters in the identchars member. +3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method + is passed a single argument consisting of the remainder of the line. +4. Typing an empty line repeats the last command. (Actually, it calls the + method `emptyline', which may be overridden in a subclass.) +5. There is a predefined `help' method. Given an argument `topic', it + calls the command `help_topic'. With no arguments, it lists all topics + with defined help_ functions, broken into up to three topics; documented + commands, miscellaneous help topics, and undocumented commands. +6. The command '?' is a synonym for `help'. The command '!' is a synonym + for `shell', if a do_shell method exists. +7. If completion is enabled, completing commands will be done automatically, + and completing of commands args is done by calling complete_foo() with + arguments text, line, begidx, endidx. text is string we are matching + against, all returned matches must begin with it. line is the current + input line (lstripped), begidx and endidx are the beginning and end + indexes of the text being matched, which could be used to provide + different completion depending upon which position the argument is in. + +The `default' method may be overridden to intercept commands for which there +is no do_ method. + +The `completedefault' method may be overridden to intercept completions for +commands that have no complete_ method. + +The data member `self.ruler' sets the character used to draw separator lines +in the help messages. If empty, no ruler line is drawn. It defaults to "=". + +If the value of `self.intro' is nonempty when the cmdloop method is called, +it is printed out on interpreter startup. This value may be overridden +via an optional argument to the cmdloop() method. + +The data members `self.doc_header', `self.misc_header', and +`self.undoc_header' set the headers used for the help function's +listings of documented functions, miscellaneous topics, and undocumented +functions respectively. + +These interpreters use raw_input; thus, if the readline module is loaded, +they automatically support Emacs-like command history and editing features. +""" + +import string + +__all__ = ["Cmd"] + +PROMPT = '(Cmd) ' +IDENTCHARS = string.ascii_letters + string.digits + '_' + +class Cmd: + """A simple framework for writing line-oriented command interpreters. + + These are often useful for test harnesses, administrative tools, and + prototypes that will later be wrapped in a more sophisticated interface. + + A Cmd instance or subclass instance is a line-oriented interpreter + framework. There is no good reason to instantiate Cmd itself; rather, + it's useful as a superclass of an interpreter class you define yourself + in order to inherit Cmd's methods and encapsulate action methods. + + """ + prompt = PROMPT + identchars = IDENTCHARS + ruler = '=' + lastcmd = '' + intro = None + doc_leader = "" + doc_header = "Documented commands (type help ):" + misc_header = "Miscellaneous help topics:" + undoc_header = "Undocumented commands:" + nohelp = "*** No help on %s" + use_rawinput = 1 + + def __init__(self, completekey='tab', stdin=None, stdout=None): + """Instantiate a line-oriented interpreter framework. + + The optional argument 'completekey' is the readline name of a + completion key; it defaults to the Tab key. If completekey is + not None and the readline module is available, command completion + is done automatically. The optional arguments stdin and stdout + specify alternate input and output file objects; if not specified, + sys.stdin and sys.stdout are used. + + """ + import sys + if stdin is not None: + self.stdin = stdin + else: + self.stdin = sys.stdin + if stdout is not None: + self.stdout = stdout + else: + self.stdout = sys.stdout + self.cmdqueue = [] + self.completekey = completekey + + def cmdloop(self, intro=None): + """Repeatedly issue a prompt, accept input, parse an initial prefix + off the received input, and dispatch to action methods, passing them + the remainder of the line as argument. + + """ + + self.preloop() + #if self.use_rawinput and self.completekey: + # try: + # import readline + # self.old_completer = readline.get_completer() + # readline.set_completer(self.complete) + # readline.parse_and_bind(self.completekey+": complete") + # except ImportError: + # pass + #try: + if intro is not None: + self.intro = intro + if self.intro: + self.stdout.write(str(self.intro)+"\n") + stop = None + while not stop: + if self.cmdqueue: + line = self.cmdqueue.pop(0) + else: + if self.use_rawinput: + try: + line = raw_input(self.prompt) + except EOFError: + line = 'EOF' + else: + self.stdout.write(self.prompt) + self.stdout.flush() + line = self.stdin.readline() + if not len(line): + line = 'EOF' + else: + line = line.rstrip('\r\n') + line = self.precmd(line) + stop = self.onecmd(line) + stop = self.postcmd(stop, line) + self.postloop() + # if self.use_rawinput and self.completekey: + # try: + # import readline + # readline.set_completer(self.old_completer) + # except ImportError: + # pass + + + def precmd(self, line): + """Hook method executed just before the command line is + interpreted, but after the input prompt is generated and issued. + + """ + return line + + def postcmd(self, stop, line): + """Hook method executed just after a command dispatch is finished.""" + return stop + + def preloop(self): + """Hook method executed once when the cmdloop() method is called.""" + pass + + def postloop(self): + """Hook method executed once when the cmdloop() method is about to + return. + + """ + pass + + def parseline(self, line): + """Parse the line into a command name and a string containing + the arguments. Returns a tuple containing (command, args, line). + 'command' and 'args' may be None if the line couldn't be parsed. + """ + line = line.strip() + if not line: + return None, None, line + elif line[0] == '?': + line = 'help ' + line[1:] + elif line[0] == '!': + if hasattr(self, 'do_shell'): + line = 'shell ' + line[1:] + else: + return None, None, line + i, n = 0, len(line) + while i < n and line[i] in self.identchars: i = i+1 + cmd, arg = line[:i], line[i:].strip() + return cmd, arg, line + + def onecmd(self, line): + """Interpret the argument as though it had been typed in response + to the prompt. + + This may be overridden, but should not normally need to be; + see the precmd() and postcmd() methods for useful execution hooks. + The return value is a flag indicating whether interpretation of + commands by the interpreter should stop. + + """ + cmd, arg, line = self.parseline(line) + if not line: + return self.emptyline() + if cmd is None: + return self.default(line) + self.lastcmd = line + if line == 'EOF' : + self.lastcmd = '' + if cmd == '': + return self.default(line) + else: + try: + func = getattr(self, 'do_' + cmd) + except AttributeError: + return self.default(line) + return func(arg) + + def emptyline(self): + """Called when an empty line is entered in response to the prompt. + + If this method is not overridden, it repeats the last nonempty + command entered. + + """ + if self.lastcmd: + return self.onecmd(self.lastcmd) + + def default(self, line): + """Called on an input line when the command prefix is not recognized. + + If this method is not overridden, it prints an error message and + returns. + + """ + self.stdout.write('*** Unknown syntax: %s\n'%line) + + def completedefault(self, *ignored): + """Method called to complete an input line when no command-specific + complete_*() method is available. + + By default, it returns an empty list. + + """ + return [] + + def completenames(self, text, *ignored): + dotext = 'do_'+text + return [a[3:] for a in self.get_names() if a.startswith(dotext)] + + def complete(self, text, state): + """Return the next possible completion for 'text'. + + If a command has not been entered, then complete against command list. + Otherwise try to call complete_ to get list of completions. + """ + #if state == 0: + # import readline + # origline = readline.get_line_buffer() + # line = origline.lstrip() + # stripped = len(origline) - len(line) + # begidx = readline.get_begidx() - stripped + # endidx = readline.get_endidx() - stripped + # if begidx>0: + # cmd, args, foo = self.parseline(line) + # if cmd == '': + # compfunc = self.completedefault + # else: + # try: + # compfunc = getattr(self, 'complete_' + cmd) + # except AttributeError: + # compfunc = self.completedefault + # else: + # compfunc = self.completenames + # self.completion_matches = compfunc(text, line, begidx, endidx) + #try: + # return self.completion_matches[state] + #except IndexError: + # return None + + def get_names(self): + # This method used to pull in base class attributes + # at a time dir() didn't do it yet. + return dir(self.__class__) + + def complete_help(self, *args): + commands = set(self.completenames(*args)) + topics = set(a[5:] for a in self.get_names() + if a.startswith('help_' + args[0])) + return list(commands | topics) + + def do_help(self, arg): + 'List available commands with "help" or detailed help with "help cmd".' + if arg: + # XXX check arg syntax + try: + func = getattr(self, 'help_' + arg) + except AttributeError: + try: + doc=getattr(self, 'do_' + arg).__doc__ + if doc: + self.stdout.write("%s\n"%str(doc)) + return + except AttributeError: + pass + self.stdout.write("%s\n"%str(self.nohelp % (arg,))) + return + func() + else: + names = self.get_names() + cmds_doc = [] + cmds_undoc = [] + help = {} + for name in names: + if name[:5] == 'help_': + help[name[5:]]=1 + names.sort() + # There can be duplicates if routines overridden + prevname = '' + for name in names: + if name[:3] == 'do_': + if name == prevname: + continue + prevname = name + cmd=name[3:] + if cmd in help: + cmds_doc.append(cmd) + del help[cmd] + elif getattr(self, name).__doc__: + cmds_doc.append(cmd) + else: + cmds_undoc.append(cmd) + self.stdout.write("%s\n"%str(self.doc_leader)) + self.print_topics(self.doc_header, cmds_doc, 15,80) + self.print_topics(self.misc_header, help.keys(),15,80) + self.print_topics(self.undoc_header, cmds_undoc, 15,80) + + def print_topics(self, header, cmds, cmdlen, maxcol): + if cmds: + self.stdout.write("%s\n"%str(header)) + if self.ruler: + self.stdout.write("%s\n"%str(self.ruler * len(header))) + self.columnize(cmds, maxcol-1) + self.stdout.write("\n") + + def columnize(self, list, displaywidth=80): + """Display a list of strings as a compact set of columns. + + Each column is only as wide as necessary. + Columns are separated by two spaces (one was not legible enough). + """ + if not list: + self.stdout.write("\n") + return + nonstrings = [i for i in range(len(list)) + if not isinstance(list[i], str)] + if nonstrings: + raise TypeError, ("list[i] not a string for i in %s" % + ", ".join(map(str, nonstrings))) + size = len(list) + if size == 1: + self.stdout.write('%s\n'%str(list[0])) + return + # Try every row count from 1 upwards + for nrows in range(1, len(list)): + ncols = (size+nrows-1) // nrows + colwidths = [] + totwidth = -2 + for col in range(ncols): + colwidth = 0 + for row in range(nrows): + i = row + nrows*col + if i >= size: + break + x = list[i] + colwidth = max(colwidth, len(x)) + colwidths.append(colwidth) + totwidth += colwidth + 2 + if totwidth > displaywidth: + break + if totwidth <= displaywidth: + break + else: + nrows = len(list) + ncols = 1 + colwidths = [0] + for row in range(nrows): + texts = [] + for col in range(ncols): + i = row + nrows*col + if i >= size: + x = "" + else: + x = list[i] + texts.append(x) + while texts and not texts[-1]: + del texts[-1] + for col in range(len(texts)): + texts[col] = texts[col].ljust(colwidths[col]) + self.stdout.write("%s\n"%str(" ".join(texts))) diff --git a/third_party/stdlib/codecs.py b/third_party/stdlib/codecs.py new file mode 100644 index 00000000..3d9be35c --- /dev/null +++ b/third_party/stdlib/codecs.py @@ -0,0 +1,1113 @@ +""" codecs -- Python Codec Registry, API and helpers. + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +"""#" + +import __builtin__, sys + +### Registry and builtin stateless codec functions + +try: + from _codecs import * +except ImportError, why: + raise SystemError('Failed to load the builtin codecs: %s' % why) + +__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE", + "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", + "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE", + "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE", + "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder", + "StreamReader", "StreamWriter", + "StreamReaderWriter", "StreamRecoder", + "getencoder", "getdecoder", "getincrementalencoder", + "getincrementaldecoder", "getreader", "getwriter", + "encode", "decode", "iterencode", "iterdecode", + "strict_errors", "ignore_errors", "replace_errors", + "xmlcharrefreplace_errors", "backslashreplace_errors", + "register_error", "lookup_error"] + +### Constants + +# +# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF) +# and its possible byte string values +# for UTF8/UTF16/UTF32 output and little/big endian machines +# + +# UTF-8 +BOM_UTF8 = '\xef\xbb\xbf' + +# UTF-16, little endian +BOM_LE = BOM_UTF16_LE = '\xff\xfe' + +# UTF-16, big endian +BOM_BE = BOM_UTF16_BE = '\xfe\xff' + +# UTF-32, little endian +BOM_UTF32_LE = '\xff\xfe\x00\x00' + +# UTF-32, big endian +BOM_UTF32_BE = '\x00\x00\xfe\xff' + +if sys.byteorder == 'little': + + # UTF-16, native endianness + BOM = BOM_UTF16 = BOM_UTF16_LE + + # UTF-32, native endianness + BOM_UTF32 = BOM_UTF32_LE + +else: + + # UTF-16, native endianness + BOM = BOM_UTF16 = BOM_UTF16_BE + + # UTF-32, native endianness + BOM_UTF32 = BOM_UTF32_BE + +# Old broken names (don't use in new code) +BOM32_LE = BOM_UTF16_LE +BOM32_BE = BOM_UTF16_BE +BOM64_LE = BOM_UTF32_LE +BOM64_BE = BOM_UTF32_BE + + +### Codec base classes (defining the API) + +class CodecInfo(tuple): + """Codec details when looking up the codec registry""" + + # Private API to allow Python to blacklist the known non-Unicode + # codecs in the standard library. A more general mechanism to + # reliably distinguish test encodings from other codecs will hopefully + # be defined for Python 3.5 + # + # See http://bugs.python.org/issue19619 + _is_text_encoding = True # Assume codecs are text encodings by default + + def __new__(cls, encode, decode, streamreader=None, streamwriter=None, + incrementalencoder=None, incrementaldecoder=None, name=None, + _is_text_encoding=None): + self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) + self.name = name + self.encode = encode + self.decode = decode + self.incrementalencoder = incrementalencoder + self.incrementaldecoder = incrementaldecoder + self.streamwriter = streamwriter + self.streamreader = streamreader + if _is_text_encoding is not None: + self._is_text_encoding = _is_text_encoding + return self + + def __repr__(self): + return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self)) + +class Codec: + + """ Defines the interface for stateless encoders/decoders. + + The .encode()/.decode() methods may use different error + handling schemes by providing the errors argument. These + string values are predefined: + + 'strict' - raise a ValueError error (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace' - replace with a suitable replacement character; + Python will use the official U+FFFD REPLACEMENT + CHARACTER for the builtin Unicode codecs on + decoding and '?' on encoding. + 'xmlcharrefreplace' - Replace with the appropriate XML + character reference (only for encoding). + 'backslashreplace' - Replace with backslashed escape sequences + (only for encoding). + + The set of allowed values can be extended via register_error. + + """ + def encode(self, input, errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling. + + The method may not store state in the Codec instance. Use + StreamWriter for codecs which have to keep state in order to + make encoding efficient. + + The encoder must be able to handle zero length input and + return an empty object of the output object type in this + situation. + + """ + raise NotImplementedError + + def decode(self, input, errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling. + + The method may not store state in the Codec instance. Use + StreamReader for codecs which have to keep state in order to + make decoding efficient. + + The decoder must be able to handle zero length input and + return an empty object of the output object type in this + situation. + + """ + raise NotImplementedError + +class IncrementalEncoder(object): + """ + An IncrementalEncoder encodes an input in multiple steps. The input can be + passed piece by piece to the encode() method. The IncrementalEncoder remembers + the state of the Encoding process between calls to encode(). + """ + def __init__(self, errors='strict'): + """ + Creates an IncrementalEncoder instance. + + The IncrementalEncoder may use different error handling schemes by + providing the errors keyword argument. See the module docstring + for a list of possible values. + """ + self.errors = errors + self.buffer = "" + + def encode(self, input, final=False): + """ + Encodes input and returns the resulting object. + """ + raise NotImplementedError + + def reset(self): + """ + Resets the encoder to the initial state. + """ + + def getstate(self): + """ + Return the current state of the encoder. + """ + return 0 + + def setstate(self, state): + """ + Set the current state of the encoder. state must have been + returned by getstate(). + """ + +class BufferedIncrementalEncoder(IncrementalEncoder): + """ + This subclass of IncrementalEncoder can be used as the baseclass for an + incremental encoder if the encoder must keep some of the output in a + buffer between calls to encode(). + """ + def __init__(self, errors='strict'): + IncrementalEncoder.__init__(self, errors) + self.buffer = "" # unencoded input that is kept between calls to encode() + + def _buffer_encode(self, input, errors, final): + # Overwrite this method in subclasses: It must encode input + # and return an (output, length consumed) tuple + raise NotImplementedError + + def encode(self, input, final=False): + # encode input (taking the buffer into account) + data = self.buffer + input + (result, consumed) = self._buffer_encode(data, self.errors, final) + # keep unencoded input until the next call + self.buffer = data[consumed:] + return result + + def reset(self): + IncrementalEncoder.reset(self) + self.buffer = "" + + def getstate(self): + return self.buffer or 0 + + def setstate(self, state): + self.buffer = state or "" + +class IncrementalDecoder(object): + """ + An IncrementalDecoder decodes an input in multiple steps. The input can be + passed piece by piece to the decode() method. The IncrementalDecoder + remembers the state of the decoding process between calls to decode(). + """ + def __init__(self, errors='strict'): + """ + Creates a IncrementalDecoder instance. + + The IncrementalDecoder may use different error handling schemes by + providing the errors keyword argument. See the module docstring + for a list of possible values. + """ + self.errors = errors + + def decode(self, input, final=False): + """ + Decodes input and returns the resulting object. + """ + raise NotImplementedError + + def reset(self): + """ + Resets the decoder to the initial state. + """ + + def getstate(self): + """ + Return the current state of the decoder. + + This must be a (buffered_input, additional_state_info) tuple. + buffered_input must be a bytes object containing bytes that + were passed to decode() that have not yet been converted. + additional_state_info must be a non-negative integer + representing the state of the decoder WITHOUT yet having + processed the contents of buffered_input. In the initial state + and after reset(), getstate() must return (b"", 0). + """ + return (b"", 0) + + def setstate(self, state): + """ + Set the current state of the decoder. + + state must have been returned by getstate(). The effect of + setstate((b"", 0)) must be equivalent to reset(). + """ + +class BufferedIncrementalDecoder(IncrementalDecoder): + """ + This subclass of IncrementalDecoder can be used as the baseclass for an + incremental decoder if the decoder must be able to handle incomplete byte + sequences. + """ + def __init__(self, errors='strict'): + IncrementalDecoder.__init__(self, errors) + self.buffer = "" # undecoded input that is kept between calls to decode() + + def _buffer_decode(self, input, errors, final): + # Overwrite this method in subclasses: It must decode input + # and return an (output, length consumed) tuple + raise NotImplementedError + + def decode(self, input, final=False): + # decode input (taking the buffer into account) + data = self.buffer + input + (result, consumed) = self._buffer_decode(data, self.errors, final) + # keep undecoded input until the next call + self.buffer = data[consumed:] + return result + + def reset(self): + IncrementalDecoder.reset(self) + self.buffer = "" + + def getstate(self): + # additional state info is always 0 + return (self.buffer, 0) + + def setstate(self, state): + # ignore additional state info + self.buffer = state[0] + +# +# The StreamWriter and StreamReader class provide generic working +# interfaces which can be used to implement new encoding submodules +# very easily. See encodings/utf_8.py for an example on how this is +# done. +# + +class StreamWriter(Codec): + + def __init__(self, stream, errors='strict'): + + """ Creates a StreamWriter instance. + + stream must be a file-like object open for writing + (binary) data. + + The StreamWriter may use different error handling + schemes by providing the errors keyword argument. These + parameters are predefined: + + 'strict' - raise a ValueError (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace'- replace with a suitable replacement character + 'xmlcharrefreplace' - Replace with the appropriate XML + character reference. + 'backslashreplace' - Replace with backslashed escape + sequences (only for encoding). + + The set of allowed parameter values can be extended via + register_error. + """ + self.stream = stream + self.errors = errors + + def write(self, object): + + """ Writes the object's contents encoded to self.stream. + """ + data, consumed = self.encode(object, self.errors) + self.stream.write(data) + + def writelines(self, list): + + """ Writes the concatenated list of strings to the stream + using .write(). + """ + self.write(''.join(list)) + + def reset(self): + + """ Flushes and resets the codec buffers used for keeping state. + + Calling this method should ensure that the data on the + output is put into a clean state, that allows appending + of new fresh data without having to rescan the whole + stream to recover state. + + """ + pass + + def seek(self, offset, whence=0): + self.stream.seek(offset, whence) + if whence == 0 and offset == 0: + self.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamReader(Codec): + + def __init__(self, stream, errors='strict'): + + """ Creates a StreamReader instance. + + stream must be a file-like object open for reading + (binary) data. + + The StreamReader may use different error handling + schemes by providing the errors keyword argument. These + parameters are predefined: + + 'strict' - raise a ValueError (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace'- replace with a suitable replacement character; + + The set of allowed parameter values can be extended via + register_error. + """ + self.stream = stream + self.errors = errors + self.bytebuffer = "" + # For str->str decoding this will stay a str + # For str->unicode decoding the first read will promote it to unicode + self.charbuffer = "" + self.linebuffer = None + + def decode(self, input, errors='strict'): + raise NotImplementedError + + def read(self, size=-1, chars=-1, firstline=False): + + """ Decodes data from the stream self.stream and returns the + resulting object. + + chars indicates the number of characters to read from the + stream. read() will never return more than chars + characters, but it might return less, if there are not enough + characters available. + + size indicates the approximate maximum number of bytes to + read from the stream for decoding purposes. The decoder + can modify this setting as appropriate. The default value + -1 indicates to read and decode as much as possible. size + is intended to prevent having to decode huge files in one + step. + + If firstline is true, and a UnicodeDecodeError happens + after the first line terminator in the input only the first line + will be returned, the rest of the input will be kept until the + next call to read(). + + The method should use a greedy read strategy meaning that + it should read as much data as is allowed within the + definition of the encoding and the given size, e.g. if + optional encoding endings or state markers are available + on the stream, these should be read too. + """ + # If we have lines cached, first merge them back into characters + if self.linebuffer: + self.charbuffer = "".join(self.linebuffer) + self.linebuffer = None + + # read until we get the required number of characters (if available) + while True: + # can the request be satisfied from the character buffer? + if chars >= 0: + if len(self.charbuffer) >= chars: + break + elif size >= 0: + if len(self.charbuffer) >= size: + break + # we need more data + if size < 0: + newdata = self.stream.read() + else: + newdata = self.stream.read(size) + # decode bytes (those remaining from the last call included) + data = self.bytebuffer + newdata + try: + newchars, decodedbytes = self.decode(data, self.errors) + except UnicodeDecodeError, exc: + if firstline: + newchars, decodedbytes = self.decode(data[:exc.start], self.errors) + lines = newchars.splitlines(True) + if len(lines)<=1: + raise + else: + raise + # keep undecoded bytes until the next call + self.bytebuffer = data[decodedbytes:] + # put new characters in the character buffer + self.charbuffer += newchars + # there was no data available + if not newdata: + break + if chars < 0: + # Return everything we've got + result = self.charbuffer + self.charbuffer = "" + else: + # Return the first chars characters + result = self.charbuffer[:chars] + self.charbuffer = self.charbuffer[chars:] + return result + + def readline(self, size=None, keepends=True): + + """ Read one line from the input stream and return the + decoded data. + + size, if given, is passed as size argument to the + read() method. + + """ + # If we have lines cached from an earlier read, return + # them unconditionally + if self.linebuffer: + line = self.linebuffer[0] + del self.linebuffer[0] + if len(self.linebuffer) == 1: + # revert to charbuffer mode; we might need more data + # next time + self.charbuffer = self.linebuffer[0] + self.linebuffer = None + if not keepends: + line = line.splitlines(False)[0] + return line + + readsize = size or 72 + line = "" + # If size is given, we call read() only once + while True: + data = self.read(readsize, firstline=True) + if data: + # If we're at a "\r" read one extra character (which might + # be a "\n") to get a proper line ending. If the stream is + # temporarily exhausted we return the wrong line ending. + if data.endswith("\r"): + data += self.read(size=1, chars=1) + + line += data + lines = line.splitlines(True) + if lines: + if len(lines) > 1: + # More than one line result; the first line is a full line + # to return + line = lines[0] + del lines[0] + if len(lines) > 1: + # cache the remaining lines + lines[-1] += self.charbuffer + self.linebuffer = lines + self.charbuffer = None + else: + # only one remaining line, put it back into charbuffer + self.charbuffer = lines[0] + self.charbuffer + if not keepends: + line = line.splitlines(False)[0] + break + line0withend = lines[0] + line0withoutend = lines[0].splitlines(False)[0] + if line0withend != line0withoutend: # We really have a line end + # Put the rest back together and keep it until the next call + self.charbuffer = "".join(lines[1:]) + self.charbuffer + if keepends: + line = line0withend + else: + line = line0withoutend + break + # we didn't get anything or this was our only try + if not data or size is not None: + if line and not keepends: + line = line.splitlines(False)[0] + break + if readsize<8000: + readsize *= 2 + return line + + def readlines(self, sizehint=None, keepends=True): + + """ Read all lines available on the input stream + and return them as list of lines. + + Line breaks are implemented using the codec's decoder + method and are included in the list entries. + + sizehint, if given, is ignored since there is no efficient + way to finding the true end-of-line. + + """ + data = self.read() + return data.splitlines(keepends) + + def reset(self): + + """ Resets the codec buffers used for keeping state. + + Note that no stream repositioning should take place. + This method is primarily intended to be able to recover + from decoding errors. + + """ + self.bytebuffer = "" + self.charbuffer = u"" + self.linebuffer = None + + def seek(self, offset, whence=0): + """ Set the input stream's current position. + + Resets the codec buffers used for keeping state. + """ + self.stream.seek(offset, whence) + self.reset() + + def next(self): + + """ Return the next decoded line from the input stream.""" + line = self.readline() + if line: + return line + raise StopIteration + + def __iter__(self): + return self + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamReaderWriter: + + """ StreamReaderWriter instances allow wrapping streams which + work in both read and write modes. + + The design is such that one can use the factory functions + returned by the codec.lookup() function to construct the + instance. + + """ + # Optional attributes set by the file wrappers below + encoding = 'unknown' + + def __init__(self, stream, Reader, Writer, errors='strict'): + + """ Creates a StreamReaderWriter instance. + + stream must be a Stream-like object. + + Reader, Writer must be factory functions or classes + providing the StreamReader, StreamWriter interface resp. + + Error handling is done in the same way as defined for the + StreamWriter/Readers. + + """ + self.stream = stream + self.reader = Reader(stream, errors) + self.writer = Writer(stream, errors) + self.errors = errors + + def read(self, size=-1): + + return self.reader.read(size) + + def readline(self, size=None): + + return self.reader.readline(size) + + def readlines(self, sizehint=None): + + return self.reader.readlines(sizehint) + + def next(self): + + """ Return the next decoded line from the input stream.""" + return self.reader.next() + + def __iter__(self): + return self + + def write(self, data): + + return self.writer.write(data) + + def writelines(self, list): + + return self.writer.writelines(list) + + def reset(self): + + self.reader.reset() + self.writer.reset() + + def seek(self, offset, whence=0): + self.stream.seek(offset, whence) + self.reader.reset() + if whence == 0 and offset == 0: + self.writer.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + # these are needed to make "with codecs.open(...)" work properly + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamRecoder: + + """ StreamRecoder instances provide a frontend - backend + view of encoding data. + + They use the complete set of APIs returned by the + codecs.lookup() function to implement their task. + + Data written to the stream is first decoded into an + intermediate format (which is dependent on the given codec + combination) and then written to the stream using an instance + of the provided Writer class. + + In the other direction, data is read from the stream using a + Reader instance and then return encoded data to the caller. + + """ + # Optional attributes set by the file wrappers below + data_encoding = 'unknown' + file_encoding = 'unknown' + + def __init__(self, stream, encode, decode, Reader, Writer, + errors='strict'): + + """ Creates a StreamRecoder instance which implements a two-way + conversion: encode and decode work on the frontend (the + input to .read() and output of .write()) while + Reader and Writer work on the backend (reading and + writing to the stream). + + You can use these objects to do transparent direct + recodings from e.g. latin-1 to utf-8 and back. + + stream must be a file-like object. + + encode, decode must adhere to the Codec interface, Reader, + Writer must be factory functions or classes providing the + StreamReader, StreamWriter interface resp. + + encode and decode are needed for the frontend translation, + Reader and Writer for the backend translation. Unicode is + used as intermediate encoding. + + Error handling is done in the same way as defined for the + StreamWriter/Readers. + + """ + self.stream = stream + self.encode = encode + self.decode = decode + self.reader = Reader(stream, errors) + self.writer = Writer(stream, errors) + self.errors = errors + + def read(self, size=-1): + + data = self.reader.read(size) + data, bytesencoded = self.encode(data, self.errors) + return data + + def readline(self, size=None): + + if size is None: + data = self.reader.readline() + else: + data = self.reader.readline(size) + data, bytesencoded = self.encode(data, self.errors) + return data + + def readlines(self, sizehint=None): + + data = self.reader.read() + data, bytesencoded = self.encode(data, self.errors) + return data.splitlines(1) + + def next(self): + + """ Return the next decoded line from the input stream.""" + data = self.reader.next() + data, bytesencoded = self.encode(data, self.errors) + return data + + def __iter__(self): + return self + + def write(self, data): + + data, bytesdecoded = self.decode(data, self.errors) + return self.writer.write(data) + + def writelines(self, list): + + data = ''.join(list) + data, bytesdecoded = self.decode(data, self.errors) + return self.writer.write(data) + + def reset(self): + + self.reader.reset() + self.writer.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### Shortcuts + +def open(filename, mode='rb', encoding=None, errors='strict', buffering=1): + + """ Open an encoded file using the given mode and return + a wrapped version providing transparent encoding/decoding. + + Note: The wrapped version will only accept the object format + defined by the codecs, i.e. Unicode objects for most builtin + codecs. Output is also codec dependent and will usually be + Unicode as well. + + Files are always opened in binary mode, even if no binary mode + was specified. This is done to avoid data loss due to encodings + using 8-bit values. The default file mode is 'rb' meaning to + open the file in binary read mode. + + encoding specifies the encoding which is to be used for the + file. + + errors may be given to define the error handling. It defaults + to 'strict' which causes ValueErrors to be raised in case an + encoding error occurs. + + buffering has the same meaning as for the builtin open() API. + It defaults to line buffered. + + The returned wrapped file object provides an extra attribute + .encoding which allows querying the used encoding. This + attribute is only available if an encoding was specified as + parameter. + + """ + if encoding is not None: + if 'U' in mode: + # No automatic conversion of '\n' is done on reading and writing + mode = mode.strip().replace('U', '') + if mode[:1] not in set('rwa'): + mode = 'r' + mode + if 'b' not in mode: + # Force opening of the file in binary mode + mode = mode + 'b' + file = __builtin__.open(filename, mode, buffering) + if encoding is None: + return file + info = lookup(encoding) + srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors) + # Add attributes to simplify introspection + srw.encoding = encoding + return srw + +def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): + + """ Return a wrapped version of file which provides transparent + encoding translation. + + Strings written to the wrapped file are interpreted according + to the given data_encoding and then written to the original + file as string using file_encoding. The intermediate encoding + will usually be Unicode but depends on the specified codecs. + + Strings are read from the file using file_encoding and then + passed back to the caller as string using data_encoding. + + If file_encoding is not given, it defaults to data_encoding. + + errors may be given to define the error handling. It defaults + to 'strict' which causes ValueErrors to be raised in case an + encoding error occurs. + + The returned wrapped file object provides two extra attributes + .data_encoding and .file_encoding which reflect the given + parameters of the same name. The attributes can be used for + introspection by Python programs. + + """ + if file_encoding is None: + file_encoding = data_encoding + data_info = lookup(data_encoding) + file_info = lookup(file_encoding) + sr = StreamRecoder(file, data_info.encode, data_info.decode, + file_info.streamreader, file_info.streamwriter, errors) + # Add attributes to simplify introspection + sr.data_encoding = data_encoding + sr.file_encoding = file_encoding + return sr + +### Helpers for codec lookup + +def getencoder(encoding): + + """ Lookup up the codec for the given encoding and return + its encoder function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).encode + +def getdecoder(encoding): + + """ Lookup up the codec for the given encoding and return + its decoder function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).decode + +def getincrementalencoder(encoding): + + """ Lookup up the codec for the given encoding and return + its IncrementalEncoder class or factory function. + + Raises a LookupError in case the encoding cannot be found + or the codecs doesn't provide an incremental encoder. + + """ + encoder = lookup(encoding).incrementalencoder + if encoder is None: + raise LookupError(encoding) + return encoder + +def getincrementaldecoder(encoding): + + """ Lookup up the codec for the given encoding and return + its IncrementalDecoder class or factory function. + + Raises a LookupError in case the encoding cannot be found + or the codecs doesn't provide an incremental decoder. + + """ + decoder = lookup(encoding).incrementaldecoder + if decoder is None: + raise LookupError(encoding) + return decoder + +def getreader(encoding): + + """ Lookup up the codec for the given encoding and return + its StreamReader class or factory function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).streamreader + +def getwriter(encoding): + + """ Lookup up the codec for the given encoding and return + its StreamWriter class or factory function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).streamwriter + +def iterencode(iterator, encoding, errors='strict', **kwargs): + """ + Encoding iterator. + + Encodes the input strings from the iterator using a IncrementalEncoder. + + errors and kwargs are passed through to the IncrementalEncoder + constructor. + """ + encoder = getincrementalencoder(encoding)(errors, **kwargs) + for input in iterator: + output = encoder.encode(input) + if output: + yield output + output = encoder.encode("", True) + if output: + yield output + +def iterdecode(iterator, encoding, errors='strict', **kwargs): + """ + Decoding iterator. + + Decodes the input strings from the iterator using a IncrementalDecoder. + + errors and kwargs are passed through to the IncrementalDecoder + constructor. + """ + decoder = getincrementaldecoder(encoding)(errors, **kwargs) + for input in iterator: + output = decoder.decode(input) + if output: + yield output + output = decoder.decode("", True) + if output: + yield output + +### Helpers for charmap-based codecs + +def make_identity_dict(rng): + + """ make_identity_dict(rng) -> dict + + Return a dictionary where elements of the rng sequence are + mapped to themselves. + + """ + res = {} + for i in rng: + res[i]=i + return res + +def make_encoding_map(decoding_map): + + """ Creates an encoding map from a decoding map. + + If a target mapping in the decoding map occurs multiple + times, then that target is mapped to None (undefined mapping), + causing an exception when encountered by the charmap codec + during translation. + + One example where this happens is cp875.py which decodes + multiple character to \\u001a. + + """ + m = {} + for k,v in decoding_map.items(): + if not v in m: + m[v] = k + else: + m[v] = None + return m + +### error handlers + +try: + strict_errors = lookup_error("strict") + ignore_errors = lookup_error("ignore") + replace_errors = lookup_error("replace") + xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") + backslashreplace_errors = lookup_error("backslashreplace") +except LookupError: + # In --disable-unicode builds, these error handler are missing + strict_errors = None + ignore_errors = None + replace_errors = None + xmlcharrefreplace_errors = None + backslashreplace_errors = None + +# Tell modulefinder that using codecs probably needs the encodings +# package +_false = 0 +if _false: + import encodings + +### Tests + +if __name__ == '__main__': + + # Make stdout translate Latin-1 output into UTF-8 output + sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') + + # Have stdin translate Latin-1 input into UTF-8 input + sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1') diff --git a/third_party/stdlib/ctypes/__init__.py b/third_party/stdlib/ctypes/__init__.py new file mode 100644 index 00000000..e24cfd2b --- /dev/null +++ b/third_party/stdlib/ctypes/__init__.py @@ -0,0 +1,552 @@ +"""create and manipulate C data types in Python""" + +import os as _os, sys as _sys + +__version__ = "1.1.0" + +from _ctypes import Union, Structure, Array +from _ctypes import _Pointer +from _ctypes import CFuncPtr as _CFuncPtr +from _ctypes import __version__ as _ctypes_version +from _ctypes import RTLD_LOCAL, RTLD_GLOBAL +from _ctypes import ArgumentError + +from struct import calcsize as _calcsize + +if __version__ != _ctypes_version: + raise Exception("Version number mismatch", __version__, _ctypes_version) + +if _os.name in ("nt", "ce"): + from _ctypes import FormatError + +DEFAULT_MODE = RTLD_LOCAL +if _os.name == "posix" and _sys.platform == "darwin": + # On OS X 10.3, we use RTLD_GLOBAL as default mode + # because RTLD_LOCAL does not work at least on some + # libraries. OS X 10.3 is Darwin 7, so we check for + # that. + + if int(_os.uname()[2].split('.')[0]) < 8: + DEFAULT_MODE = RTLD_GLOBAL + +from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \ + FUNCFLAG_PYTHONAPI as _FUNCFLAG_PYTHONAPI, \ + FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \ + FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR + +""" +WINOLEAPI -> HRESULT +WINOLEAPI_(type) + +STDMETHODCALLTYPE + +STDMETHOD(name) +STDMETHOD_(type, name) + +STDAPICALLTYPE +""" + +def create_string_buffer(init, size=None): + """create_string_buffer(aString) -> character array + create_string_buffer(anInteger) -> character array + create_string_buffer(aString, anInteger) -> character array + """ + if isinstance(init, (str, unicode)): + if size is None: + size = len(init)+1 + buftype = c_char * size + buf = buftype() + buf.value = init + return buf + elif isinstance(init, (int, long)): + buftype = c_char * init + buf = buftype() + return buf + raise TypeError(init) + +def c_buffer(init, size=None): +## "deprecated, use create_string_buffer instead" +## import warnings +## warnings.warn("c_buffer is deprecated, use create_string_buffer instead", +## DeprecationWarning, stacklevel=2) + return create_string_buffer(init, size) + +_c_functype_cache = {} +def CFUNCTYPE(restype, *argtypes, **kw): + """CFUNCTYPE(restype, *argtypes, + use_errno=False, use_last_error=False) -> function prototype. + + restype: the result type + argtypes: a sequence specifying the argument types + + The function prototype can be called in different ways to create a + callable object: + + prototype(integer address) -> foreign function + prototype(callable) -> create and return a C callable function from callable + prototype(integer index, method name[, paramflags]) -> foreign function calling a COM method + prototype((ordinal number, dll object)[, paramflags]) -> foreign function exported by ordinal + prototype((function name, dll object)[, paramflags]) -> foreign function exported by name + """ + flags = _FUNCFLAG_CDECL + if kw.pop("use_errno", False): + flags |= _FUNCFLAG_USE_ERRNO + if kw.pop("use_last_error", False): + flags |= _FUNCFLAG_USE_LASTERROR + if kw: + raise ValueError("unexpected keyword argument(s) %s" % kw.keys()) + try: + return _c_functype_cache[(restype, argtypes, flags)] + except KeyError: + class CFunctionType(_CFuncPtr): + _argtypes_ = argtypes + _restype_ = restype + _flags_ = flags + _c_functype_cache[(restype, argtypes, flags)] = CFunctionType + return CFunctionType + +if _os.name in ("nt", "ce"): + from _ctypes import LoadLibrary as _dlopen + from _ctypes import FUNCFLAG_STDCALL as _FUNCFLAG_STDCALL + if _os.name == "ce": + # 'ce' doesn't have the stdcall calling convention + _FUNCFLAG_STDCALL = _FUNCFLAG_CDECL + + _win_functype_cache = {} + def WINFUNCTYPE(restype, *argtypes, **kw): + # docstring set later (very similar to CFUNCTYPE.__doc__) + flags = _FUNCFLAG_STDCALL + if kw.pop("use_errno", False): + flags |= _FUNCFLAG_USE_ERRNO + if kw.pop("use_last_error", False): + flags |= _FUNCFLAG_USE_LASTERROR + if kw: + raise ValueError("unexpected keyword argument(s) %s" % kw.keys()) + try: + return _win_functype_cache[(restype, argtypes, flags)] + except KeyError: + class WinFunctionType(_CFuncPtr): + _argtypes_ = argtypes + _restype_ = restype + _flags_ = flags + _win_functype_cache[(restype, argtypes, flags)] = WinFunctionType + return WinFunctionType + if WINFUNCTYPE.__doc__: + WINFUNCTYPE.__doc__ = CFUNCTYPE.__doc__.replace("CFUNCTYPE", "WINFUNCTYPE") + +elif _os.name == "posix": + from _ctypes import dlopen as _dlopen + +from _ctypes import sizeof, byref, addressof, alignment, resize +from _ctypes import get_errno, set_errno +from _ctypes import _SimpleCData + +def _check_size(typ, typecode=None): + # Check if sizeof(ctypes_type) against struct.calcsize. This + # should protect somewhat against a misconfigured libffi. + from struct import calcsize + if typecode is None: + # Most _type_ codes are the same as used in struct + typecode = typ._type_ + actual, required = sizeof(typ), calcsize(typecode) + if actual != required: + raise SystemError("sizeof(%s) wrong: %d instead of %d" % \ + (typ, actual, required)) + +class py_object(_SimpleCData): + _type_ = "O" + def __repr__(self): + try: + return super(py_object, self).__repr__() + except ValueError: + return "%s()" % type(self).__name__ +_check_size(py_object, "P") + +class c_short(_SimpleCData): + _type_ = "h" +_check_size(c_short) + +class c_ushort(_SimpleCData): + _type_ = "H" +_check_size(c_ushort) + +class c_long(_SimpleCData): + _type_ = "l" +_check_size(c_long) + +class c_ulong(_SimpleCData): + _type_ = "L" +_check_size(c_ulong) + +if _calcsize("i") == _calcsize("l"): + # if int and long have the same size, make c_int an alias for c_long + c_int = c_long + c_uint = c_ulong +else: + class c_int(_SimpleCData): + _type_ = "i" + _check_size(c_int) + + class c_uint(_SimpleCData): + _type_ = "I" + _check_size(c_uint) + +class c_float(_SimpleCData): + _type_ = "f" +_check_size(c_float) + +class c_double(_SimpleCData): + _type_ = "d" +_check_size(c_double) + +class c_longdouble(_SimpleCData): + _type_ = "g" +if sizeof(c_longdouble) == sizeof(c_double): + c_longdouble = c_double + +if _calcsize("l") == _calcsize("q"): + # if long and long long have the same size, make c_longlong an alias for c_long + c_longlong = c_long + c_ulonglong = c_ulong +else: + class c_longlong(_SimpleCData): + _type_ = "q" + _check_size(c_longlong) + + class c_ulonglong(_SimpleCData): + _type_ = "Q" + ## def from_param(cls, val): + ## return ('d', float(val), val) + ## from_param = classmethod(from_param) + _check_size(c_ulonglong) + +class c_ubyte(_SimpleCData): + _type_ = "B" +c_ubyte.__ctype_le__ = c_ubyte.__ctype_be__ = c_ubyte +# backward compatibility: +##c_uchar = c_ubyte +_check_size(c_ubyte) + +class c_byte(_SimpleCData): + _type_ = "b" +c_byte.__ctype_le__ = c_byte.__ctype_be__ = c_byte +_check_size(c_byte) + +class c_char(_SimpleCData): + _type_ = "c" +c_char.__ctype_le__ = c_char.__ctype_be__ = c_char +_check_size(c_char) + +class c_char_p(_SimpleCData): + _type_ = "z" + if _os.name == "nt": + def __repr__(self): + if not windll.kernel32.IsBadStringPtrA(self, -1): + return "%s(%r)" % (self.__class__.__name__, self.value) + return "%s(%s)" % (self.__class__.__name__, cast(self, c_void_p).value) + else: + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, cast(self, c_void_p).value) +_check_size(c_char_p, "P") + +class c_void_p(_SimpleCData): + _type_ = "P" +c_voidp = c_void_p # backwards compatibility (to a bug) +_check_size(c_void_p) + +class c_bool(_SimpleCData): + _type_ = "?" + +from _ctypes import POINTER, pointer, _pointer_type_cache + +def _reset_cache(): + _pointer_type_cache.clear() + _c_functype_cache.clear() + if _os.name in ("nt", "ce"): + _win_functype_cache.clear() + # _SimpleCData.c_wchar_p_from_param + POINTER(c_wchar).from_param = c_wchar_p.from_param + # _SimpleCData.c_char_p_from_param + POINTER(c_char).from_param = c_char_p.from_param + _pointer_type_cache[None] = c_void_p + # XXX for whatever reasons, creating the first instance of a callback + # function is needed for the unittests on Win64 to succeed. This MAY + # be a compiler bug, since the problem occurs only when _ctypes is + # compiled with the MS SDK compiler. Or an uninitialized variable? + CFUNCTYPE(c_int)(lambda: None) + +try: + from _ctypes import set_conversion_mode +except ImportError: + pass +else: + if _os.name in ("nt", "ce"): + set_conversion_mode("mbcs", "ignore") + else: + set_conversion_mode("ascii", "strict") + + class c_wchar_p(_SimpleCData): + _type_ = "Z" + + class c_wchar(_SimpleCData): + _type_ = "u" + + def create_unicode_buffer(init, size=None): + """create_unicode_buffer(aString) -> character array + create_unicode_buffer(anInteger) -> character array + create_unicode_buffer(aString, anInteger) -> character array + """ + if isinstance(init, (str, unicode)): + if size is None: + size = len(init)+1 + buftype = c_wchar * size + buf = buftype() + buf.value = init + return buf + elif isinstance(init, (int, long)): + buftype = c_wchar * init + buf = buftype() + return buf + raise TypeError(init) + +# XXX Deprecated +def SetPointerType(pointer, cls): + if _pointer_type_cache.get(cls, None) is not None: + raise RuntimeError("This type already exists in the cache") + if id(pointer) not in _pointer_type_cache: + raise RuntimeError("What's this???") + pointer.set_type(cls) + _pointer_type_cache[cls] = pointer + del _pointer_type_cache[id(pointer)] + +# XXX Deprecated +def ARRAY(typ, len): + return typ * len + +################################################################ + + +class CDLL(object): + """An instance of this class represents a loaded dll/shared + library, exporting functions using the standard C calling + convention (named 'cdecl' on Windows). + + The exported functions can be accessed as attributes, or by + indexing with the function name. Examples: + + .qsort -> callable object + ['qsort'] -> callable object + + Calling the functions releases the Python GIL during the call and + reacquires it afterwards. + """ + _func_flags_ = _FUNCFLAG_CDECL + _func_restype_ = c_int + + def __init__(self, name, mode=DEFAULT_MODE, handle=None, + use_errno=False, + use_last_error=False): + self._name = name + flags = self._func_flags_ + if use_errno: + flags |= _FUNCFLAG_USE_ERRNO + if use_last_error: + flags |= _FUNCFLAG_USE_LASTERROR + + class _FuncPtr(_CFuncPtr): + _flags_ = flags + _restype_ = self._func_restype_ + self._FuncPtr = _FuncPtr + + if handle is None: + self._handle = _dlopen(self._name, mode) + else: + self._handle = handle + + def __repr__(self): + return "<%s '%s', handle %x at %x>" % \ + (self.__class__.__name__, self._name, + (self._handle & (_sys.maxint*2 + 1)), + id(self) & (_sys.maxint*2 + 1)) + + def __getattr__(self, name): + if name.startswith('__') and name.endswith('__'): + raise AttributeError(name) + func = self.__getitem__(name) + setattr(self, name, func) + return func + + def __getitem__(self, name_or_ordinal): + func = self._FuncPtr((name_or_ordinal, self)) + if not isinstance(name_or_ordinal, (int, long)): + func.__name__ = name_or_ordinal + return func + +class PyDLL(CDLL): + """This class represents the Python library itself. It allows + accessing Python API functions. The GIL is not released, and + Python exceptions are handled correctly. + """ + _func_flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI + +if _os.name in ("nt", "ce"): + + class WinDLL(CDLL): + """This class represents a dll exporting functions using the + Windows stdcall calling convention. + """ + _func_flags_ = _FUNCFLAG_STDCALL + + # XXX Hm, what about HRESULT as normal parameter? + # Mustn't it derive from c_long then? + from _ctypes import _check_HRESULT, _SimpleCData + class HRESULT(_SimpleCData): + _type_ = "l" + # _check_retval_ is called with the function's result when it + # is used as restype. It checks for the FAILED bit, and + # raises a WindowsError if it is set. + # + # The _check_retval_ method is implemented in C, so that the + # method definition itself is not included in the traceback + # when it raises an error - that is what we want (and Python + # doesn't have a way to raise an exception in the caller's + # frame). + _check_retval_ = _check_HRESULT + + class OleDLL(CDLL): + """This class represents a dll exporting functions using the + Windows stdcall calling convention, and returning HRESULT. + HRESULT error values are automatically raised as WindowsError + exceptions. + """ + _func_flags_ = _FUNCFLAG_STDCALL + _func_restype_ = HRESULT + +class LibraryLoader(object): + def __init__(self, dlltype): + self._dlltype = dlltype + + def __getattr__(self, name): + if name[0] == '_': + raise AttributeError(name) + dll = self._dlltype(name) + setattr(self, name, dll) + return dll + + def __getitem__(self, name): + return getattr(self, name) + + def LoadLibrary(self, name): + return self._dlltype(name) + +cdll = LibraryLoader(CDLL) +pydll = LibraryLoader(PyDLL) + +if _os.name in ("nt", "ce"): + pythonapi = PyDLL("python dll", None, _sys.dllhandle) +elif _sys.platform == "cygwin": + pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2]) +else: + pythonapi = PyDLL(None) + + +if _os.name in ("nt", "ce"): + windll = LibraryLoader(WinDLL) + oledll = LibraryLoader(OleDLL) + + if _os.name == "nt": + GetLastError = windll.kernel32.GetLastError + else: + GetLastError = windll.coredll.GetLastError + from _ctypes import get_last_error, set_last_error + + def WinError(code=None, descr=None): + if code is None: + code = GetLastError() + if descr is None: + descr = FormatError(code).strip() + return WindowsError(code, descr) + +if sizeof(c_uint) == sizeof(c_void_p): + c_size_t = c_uint + c_ssize_t = c_int +elif sizeof(c_ulong) == sizeof(c_void_p): + c_size_t = c_ulong + c_ssize_t = c_long +elif sizeof(c_ulonglong) == sizeof(c_void_p): + c_size_t = c_ulonglong + c_ssize_t = c_longlong + +# functions + +from _ctypes import _memmove_addr, _memset_addr, _string_at_addr, _cast_addr + +## void *memmove(void *, const void *, size_t); +memmove = CFUNCTYPE(c_void_p, c_void_p, c_void_p, c_size_t)(_memmove_addr) + +## void *memset(void *, int, size_t) +memset = CFUNCTYPE(c_void_p, c_void_p, c_int, c_size_t)(_memset_addr) + +def PYFUNCTYPE(restype, *argtypes): + class CFunctionType(_CFuncPtr): + _argtypes_ = argtypes + _restype_ = restype + _flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI + return CFunctionType + +_cast = PYFUNCTYPE(py_object, c_void_p, py_object, py_object)(_cast_addr) +def cast(obj, typ): + return _cast(obj, obj, typ) + +_string_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr) +def string_at(ptr, size=-1): + """string_at(addr[, size]) -> string + + Return the string at addr.""" + return _string_at(ptr, size) + +try: + from _ctypes import _wstring_at_addr +except ImportError: + pass +else: + _wstring_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_wstring_at_addr) + def wstring_at(ptr, size=-1): + """wstring_at(addr[, size]) -> string + + Return the string at addr.""" + return _wstring_at(ptr, size) + + +if _os.name in ("nt", "ce"): # COM stuff + def DllGetClassObject(rclsid, riid, ppv): + try: + ccom = __import__("comtypes.server.inprocserver", globals(), locals(), ['*']) + except ImportError: + return -2147221231 # CLASS_E_CLASSNOTAVAILABLE + else: + return ccom.DllGetClassObject(rclsid, riid, ppv) + + def DllCanUnloadNow(): + try: + ccom = __import__("comtypes.server.inprocserver", globals(), locals(), ['*']) + except ImportError: + return 0 # S_OK + return ccom.DllCanUnloadNow() + +from ctypes._endian import BigEndianStructure, LittleEndianStructure + +# Fill in specifically-sized types +c_int8 = c_byte +c_uint8 = c_ubyte +for kind in [c_short, c_int, c_long, c_longlong]: + if sizeof(kind) == 2: c_int16 = kind + elif sizeof(kind) == 4: c_int32 = kind + elif sizeof(kind) == 8: c_int64 = kind +for kind in [c_ushort, c_uint, c_ulong, c_ulonglong]: + if sizeof(kind) == 2: c_uint16 = kind + elif sizeof(kind) == 4: c_uint32 = kind + elif sizeof(kind) == 8: c_uint64 = kind +del(kind) + +_reset_cache() diff --git a/third_party/stdlib/ctypes/_endian.py b/third_party/stdlib/ctypes/_endian.py new file mode 100644 index 00000000..c0ba646f --- /dev/null +++ b/third_party/stdlib/ctypes/_endian.py @@ -0,0 +1,61 @@ +import sys +from ctypes import * + +_array_type = type(Array) + +def _other_endian(typ): + """Return the type with the 'other' byte order. Simple types like + c_int and so on already have __ctype_be__ and __ctype_le__ + attributes which contain the types, for more complicated types + arrays and structures are supported. + """ + # check _OTHER_ENDIAN attribute (present if typ is primitive type) + if hasattr(typ, _OTHER_ENDIAN): + return getattr(typ, _OTHER_ENDIAN) + # if typ is array + if isinstance(typ, _array_type): + return _other_endian(typ._type_) * typ._length_ + # if typ is structure + if issubclass(typ, Structure): + return typ + raise TypeError("This type does not support other endian: %s" % typ) + +class _swapped_meta(type(Structure)): + def __setattr__(self, attrname, value): + if attrname == "_fields_": + fields = [] + for desc in value: + name = desc[0] + typ = desc[1] + rest = desc[2:] + fields.append((name, _other_endian(typ)) + rest) + value = fields + super(_swapped_meta, self).__setattr__(attrname, value) + +################################################################ + +# Note: The Structure metaclass checks for the *presence* (not the +# value!) of a _swapped_bytes_ attribute to determine the bit order in +# structures containing bit fields. + +if sys.byteorder == "little": + _OTHER_ENDIAN = "__ctype_be__" + + LittleEndianStructure = Structure + + class BigEndianStructure(Structure): + """Structure with big endian byte order""" + __metaclass__ = _swapped_meta + _swappedbytes_ = None + +elif sys.byteorder == "big": + _OTHER_ENDIAN = "__ctype_le__" + + BigEndianStructure = Structure + class LittleEndianStructure(Structure): + """Structure with little endian byte order""" + __metaclass__ = _swapped_meta + _swappedbytes_ = None + +else: + raise RuntimeError("Invalid byteorder") diff --git a/third_party/stdlib/ctypes/macholib/README.ctypes b/third_party/stdlib/ctypes/macholib/README.ctypes new file mode 100644 index 00000000..2866e9f3 --- /dev/null +++ b/third_party/stdlib/ctypes/macholib/README.ctypes @@ -0,0 +1,7 @@ +Files in this directory come from Bob Ippolito's py2app. + +License: Any components of the py2app suite may be distributed under +the MIT or PSF open source licenses. + +This is version 1.0, SVN revision 789, from 2006/01/25. +The main repository is http://svn.red-bean.com/bob/macholib/trunk/macholib/ \ No newline at end of file diff --git a/third_party/stdlib/ctypes/macholib/__init__.py b/third_party/stdlib/ctypes/macholib/__init__.py new file mode 100644 index 00000000..5621defc --- /dev/null +++ b/third_party/stdlib/ctypes/macholib/__init__.py @@ -0,0 +1,9 @@ +""" +Enough Mach-O to make your head spin. + +See the relevant header files in /usr/include/mach-o + +And also Apple's documentation. +""" + +__version__ = '1.0' diff --git a/third_party/stdlib/ctypes/macholib/dyld.py b/third_party/stdlib/ctypes/macholib/dyld.py new file mode 100644 index 00000000..1fdf8d64 --- /dev/null +++ b/third_party/stdlib/ctypes/macholib/dyld.py @@ -0,0 +1,166 @@ +""" +dyld emulation +""" + +import os +from framework import framework_info +from dylib import dylib_info +from itertools import * + +__all__ = [ + 'dyld_find', 'framework_find', + 'framework_info', 'dylib_info', +] + +# These are the defaults as per man dyld(1) +# +DEFAULT_FRAMEWORK_FALLBACK = [ + os.path.expanduser("~/Library/Frameworks"), + "/Library/Frameworks", + "/Network/Library/Frameworks", + "/System/Library/Frameworks", +] + +DEFAULT_LIBRARY_FALLBACK = [ + os.path.expanduser("~/lib"), + "/usr/local/lib", + "/lib", + "/usr/lib", +] + +def ensure_utf8(s): + """Not all of PyObjC and Python understand unicode paths very well yet""" + if isinstance(s, unicode): + return s.encode('utf8') + return s + +def dyld_env(env, var): + if env is None: + env = os.environ + rval = env.get(var) + if rval is None: + return [] + return rval.split(':') + +def dyld_image_suffix(env=None): + if env is None: + env = os.environ + return env.get('DYLD_IMAGE_SUFFIX') + +def dyld_framework_path(env=None): + return dyld_env(env, 'DYLD_FRAMEWORK_PATH') + +def dyld_library_path(env=None): + return dyld_env(env, 'DYLD_LIBRARY_PATH') + +def dyld_fallback_framework_path(env=None): + return dyld_env(env, 'DYLD_FALLBACK_FRAMEWORK_PATH') + +def dyld_fallback_library_path(env=None): + return dyld_env(env, 'DYLD_FALLBACK_LIBRARY_PATH') + +def dyld_image_suffix_search(iterator, env=None): + """For a potential path iterator, add DYLD_IMAGE_SUFFIX semantics""" + suffix = dyld_image_suffix(env) + if suffix is None: + return iterator + def _inject(iterator=iterator, suffix=suffix): + for path in iterator: + if path.endswith('.dylib'): + yield path[:-len('.dylib')] + suffix + '.dylib' + else: + yield path + suffix + yield path + return _inject() + +def dyld_override_search(name, env=None): + # If DYLD_FRAMEWORK_PATH is set and this dylib_name is a + # framework name, use the first file that exists in the framework + # path if any. If there is none go on to search the DYLD_LIBRARY_PATH + # if any. + + framework = framework_info(name) + + if framework is not None: + for path in dyld_framework_path(env): + yield os.path.join(path, framework['name']) + + # If DYLD_LIBRARY_PATH is set then use the first file that exists + # in the path. If none use the original name. + for path in dyld_library_path(env): + yield os.path.join(path, os.path.basename(name)) + +def dyld_executable_path_search(name, executable_path=None): + # If we haven't done any searching and found a library and the + # dylib_name starts with "@executable_path/" then construct the + # library name. + if name.startswith('@executable_path/') and executable_path is not None: + yield os.path.join(executable_path, name[len('@executable_path/'):]) + +def dyld_default_search(name, env=None): + yield name + + framework = framework_info(name) + + if framework is not None: + fallback_framework_path = dyld_fallback_framework_path(env) + for path in fallback_framework_path: + yield os.path.join(path, framework['name']) + + fallback_library_path = dyld_fallback_library_path(env) + for path in fallback_library_path: + yield os.path.join(path, os.path.basename(name)) + + if framework is not None and not fallback_framework_path: + for path in DEFAULT_FRAMEWORK_FALLBACK: + yield os.path.join(path, framework['name']) + + if not fallback_library_path: + for path in DEFAULT_LIBRARY_FALLBACK: + yield os.path.join(path, os.path.basename(name)) + +def dyld_find(name, executable_path=None, env=None): + """ + Find a library or framework using dyld semantics + """ + name = ensure_utf8(name) + executable_path = ensure_utf8(executable_path) + for path in dyld_image_suffix_search(chain( + dyld_override_search(name, env), + dyld_executable_path_search(name, executable_path), + dyld_default_search(name, env), + ), env): + if os.path.isfile(path): + return path + raise ValueError("dylib %s could not be found" % (name,)) + +def framework_find(fn, executable_path=None, env=None): + """ + Find a framework using dyld semantics in a very loose manner. + + Will take input such as: + Python + Python.framework + Python.framework/Versions/Current + """ + try: + return dyld_find(fn, executable_path=executable_path, env=env) + except ValueError, e: + pass + fmwk_index = fn.rfind('.framework') + if fmwk_index == -1: + fmwk_index = len(fn) + fn += '.framework' + fn = os.path.join(fn, os.path.basename(fn[:fmwk_index])) + try: + return dyld_find(fn, executable_path=executable_path, env=env) + except ValueError: + raise e + +def test_dyld_find(): + env = {} + assert dyld_find('libSystem.dylib') == '/usr/lib/libSystem.dylib' + assert dyld_find('System.framework/System') == '/System/Library/Frameworks/System.framework/System' + +if __name__ == '__main__': + test_dyld_find() diff --git a/third_party/stdlib/ctypes/macholib/dylib.py b/third_party/stdlib/ctypes/macholib/dylib.py new file mode 100644 index 00000000..aa107507 --- /dev/null +++ b/third_party/stdlib/ctypes/macholib/dylib.py @@ -0,0 +1,63 @@ +""" +Generic dylib path manipulation +""" + +import re + +__all__ = ['dylib_info'] + +DYLIB_RE = re.compile(r"""(?x) +(?P^.*)(?:^|/) +(?P + (?P\w+?) + (?:\.(?P[^._]+))? + (?:_(?P[^._]+))? + \.dylib$ +) +""") + +def dylib_info(filename): + """ + A dylib name can take one of the following four forms: + Location/Name.SomeVersion_Suffix.dylib + Location/Name.SomeVersion.dylib + Location/Name_Suffix.dylib + Location/Name.dylib + + returns None if not found or a mapping equivalent to: + dict( + location='Location', + name='Name.SomeVersion_Suffix.dylib', + shortname='Name', + version='SomeVersion', + suffix='Suffix', + ) + + Note that SomeVersion and Suffix are optional and may be None + if not present. + """ + is_dylib = DYLIB_RE.match(filename) + if not is_dylib: + return None + return is_dylib.groupdict() + + +def test_dylib_info(): + def d(location=None, name=None, shortname=None, version=None, suffix=None): + return dict( + location=location, + name=name, + shortname=shortname, + version=version, + suffix=suffix + ) + assert dylib_info('completely/invalid') is None + assert dylib_info('completely/invalide_debug') is None + assert dylib_info('P/Foo.dylib') == d('P', 'Foo.dylib', 'Foo') + assert dylib_info('P/Foo_debug.dylib') == d('P', 'Foo_debug.dylib', 'Foo', suffix='debug') + assert dylib_info('P/Foo.A.dylib') == d('P', 'Foo.A.dylib', 'Foo', 'A') + assert dylib_info('P/Foo_debug.A.dylib') == d('P', 'Foo_debug.A.dylib', 'Foo_debug', 'A') + assert dylib_info('P/Foo.A_debug.dylib') == d('P', 'Foo.A_debug.dylib', 'Foo', 'A', 'debug') + +if __name__ == '__main__': + test_dylib_info() diff --git a/third_party/stdlib/ctypes/macholib/fetch_macholib b/third_party/stdlib/ctypes/macholib/fetch_macholib new file mode 100755 index 00000000..e6d6a226 --- /dev/null +++ b/third_party/stdlib/ctypes/macholib/fetch_macholib @@ -0,0 +1,2 @@ +#!/bin/sh +svn export --force http://svn.red-bean.com/bob/macholib/trunk/macholib/ . diff --git a/third_party/stdlib/ctypes/macholib/fetch_macholib.bat b/third_party/stdlib/ctypes/macholib/fetch_macholib.bat new file mode 100644 index 00000000..f474d5cd --- /dev/null +++ b/third_party/stdlib/ctypes/macholib/fetch_macholib.bat @@ -0,0 +1 @@ +svn export --force http://svn.red-bean.com/bob/macholib/trunk/macholib/ . diff --git a/third_party/stdlib/ctypes/macholib/framework.py b/third_party/stdlib/ctypes/macholib/framework.py new file mode 100644 index 00000000..ad6ed554 --- /dev/null +++ b/third_party/stdlib/ctypes/macholib/framework.py @@ -0,0 +1,65 @@ +""" +Generic framework path manipulation +""" + +import re + +__all__ = ['framework_info'] + +STRICT_FRAMEWORK_RE = re.compile(r"""(?x) +(?P^.*)(?:^|/) +(?P + (?P\w+).framework/ + (?:Versions/(?P[^/]+)/)? + (?P=shortname) + (?:_(?P[^_]+))? +)$ +""") + +def framework_info(filename): + """ + A framework name can take one of the following four forms: + Location/Name.framework/Versions/SomeVersion/Name_Suffix + Location/Name.framework/Versions/SomeVersion/Name + Location/Name.framework/Name_Suffix + Location/Name.framework/Name + + returns None if not found, or a mapping equivalent to: + dict( + location='Location', + name='Name.framework/Versions/SomeVersion/Name_Suffix', + shortname='Name', + version='SomeVersion', + suffix='Suffix', + ) + + Note that SomeVersion and Suffix are optional and may be None + if not present + """ + is_framework = STRICT_FRAMEWORK_RE.match(filename) + if not is_framework: + return None + return is_framework.groupdict() + +def test_framework_info(): + def d(location=None, name=None, shortname=None, version=None, suffix=None): + return dict( + location=location, + name=name, + shortname=shortname, + version=version, + suffix=suffix + ) + assert framework_info('completely/invalid') is None + assert framework_info('completely/invalid/_debug') is None + assert framework_info('P/F.framework') is None + assert framework_info('P/F.framework/_debug') is None + assert framework_info('P/F.framework/F') == d('P', 'F.framework/F', 'F') + assert framework_info('P/F.framework/F_debug') == d('P', 'F.framework/F_debug', 'F', suffix='debug') + assert framework_info('P/F.framework/Versions') is None + assert framework_info('P/F.framework/Versions/A') is None + assert framework_info('P/F.framework/Versions/A/F') == d('P', 'F.framework/Versions/A/F', 'F', 'A') + assert framework_info('P/F.framework/Versions/A/F_debug') == d('P', 'F.framework/Versions/A/F_debug', 'F', 'A', 'debug') + +if __name__ == '__main__': + test_framework_info() diff --git a/third_party/stdlib/ctypes/test/__init__.py b/third_party/stdlib/ctypes/test/__init__.py new file mode 100644 index 00000000..f2cc1435 --- /dev/null +++ b/third_party/stdlib/ctypes/test/__init__.py @@ -0,0 +1,216 @@ +import os, sys, unittest, getopt, time + +use_resources = [] + +import ctypes +ctypes_symbols = dir(ctypes) + +def need_symbol(name): + return unittest.skipUnless(name in ctypes_symbols, + '{!r} is required'.format(name)) + + +class ResourceDenied(unittest.SkipTest): + """Test skipped because it requested a disallowed resource. + + This is raised when a test calls requires() for a resource that + has not be enabled. Resources are defined by test modules. + """ + +def is_resource_enabled(resource): + """Test whether a resource is enabled. + + If the caller's module is __main__ then automatically return True.""" + if sys._getframe().f_back.f_globals.get("__name__") == "__main__": + return True + result = use_resources is not None and \ + (resource in use_resources or "*" in use_resources) + if not result: + _unavail[resource] = None + return result + +_unavail = {} +def requires(resource, msg=None): + """Raise ResourceDenied if the specified resource is not available. + + If the caller's module is __main__ then automatically return True.""" + # see if the caller's module is __main__ - if so, treat as if + # the resource was set + if sys._getframe().f_back.f_globals.get("__name__") == "__main__": + return + if not is_resource_enabled(resource): + if msg is None: + msg = "Use of the `%s' resource not enabled" % resource + raise ResourceDenied(msg) + +def find_package_modules(package, mask): + import fnmatch + if (hasattr(package, "__loader__") and + hasattr(package.__loader__, '_files')): + path = package.__name__.replace(".", os.path.sep) + mask = os.path.join(path, mask) + for fnm in package.__loader__._files.iterkeys(): + if fnmatch.fnmatchcase(fnm, mask): + yield os.path.splitext(fnm)[0].replace(os.path.sep, ".") + else: + path = package.__path__[0] + for fnm in os.listdir(path): + if fnmatch.fnmatchcase(fnm, mask): + yield "%s.%s" % (package.__name__, os.path.splitext(fnm)[0]) + +def get_tests(package, mask, verbosity, exclude=()): + """Return a list of skipped test modules, and a list of test cases.""" + tests = [] + skipped = [] + for modname in find_package_modules(package, mask): + if modname.split(".")[-1] in exclude: + skipped.append(modname) + if verbosity > 1: + print >> sys.stderr, "Skipped %s: excluded" % modname + continue + try: + mod = __import__(modname, globals(), locals(), ['*']) + except (ResourceDenied, unittest.SkipTest) as detail: + skipped.append(modname) + if verbosity > 1: + print >> sys.stderr, "Skipped %s: %s" % (modname, detail) + continue + for name in dir(mod): + if name.startswith("_"): + continue + o = getattr(mod, name) + if type(o) is type(unittest.TestCase) and issubclass(o, unittest.TestCase): + tests.append(o) + return skipped, tests + +def usage(): + print __doc__ + return 1 + +def test_with_refcounts(runner, verbosity, testcase): + """Run testcase several times, tracking reference counts.""" + import gc + import ctypes + ptc = ctypes._pointer_type_cache.copy() + cfc = ctypes._c_functype_cache.copy() + wfc = ctypes._win_functype_cache.copy() + + # when searching for refcount leaks, we have to manually reset any + # caches that ctypes has. + def cleanup(): + ctypes._pointer_type_cache = ptc.copy() + ctypes._c_functype_cache = cfc.copy() + ctypes._win_functype_cache = wfc.copy() + gc.collect() + + test = unittest.makeSuite(testcase) + for i in range(5): + rc = sys.gettotalrefcount() + runner.run(test) + cleanup() + COUNT = 5 + refcounts = [None] * COUNT + for i in range(COUNT): + rc = sys.gettotalrefcount() + runner.run(test) + cleanup() + refcounts[i] = sys.gettotalrefcount() - rc + if filter(None, refcounts): + print "%s leaks:\n\t" % testcase, refcounts + elif verbosity: + print "%s: ok." % testcase + +class TestRunner(unittest.TextTestRunner): + def run(self, test, skipped): + "Run the given test case or test suite." + # Same as unittest.TextTestRunner.run, except that it reports + # skipped tests. + result = self._makeResult() + startTime = time.time() + test(result) + stopTime = time.time() + timeTaken = stopTime - startTime + result.printErrors() + self.stream.writeln(result.separator2) + run = result.testsRun + if _unavail: #skipped: + requested = _unavail.keys() + requested.sort() + self.stream.writeln("Ran %d test%s in %.3fs (%s module%s skipped)" % + (run, run != 1 and "s" or "", timeTaken, + len(skipped), + len(skipped) != 1 and "s" or "")) + self.stream.writeln("Unavailable resources: %s" % ", ".join(requested)) + else: + self.stream.writeln("Ran %d test%s in %.3fs" % + (run, run != 1 and "s" or "", timeTaken)) + self.stream.writeln() + if not result.wasSuccessful(): + self.stream.write("FAILED (") + failed, errored = map(len, (result.failures, result.errors)) + if failed: + self.stream.write("failures=%d" % failed) + if errored: + if failed: self.stream.write(", ") + self.stream.write("errors=%d" % errored) + self.stream.writeln(")") + else: + self.stream.writeln("OK") + return result + + +def main(*packages): + try: + opts, args = getopt.getopt(sys.argv[1:], "rqvu:x:") + except getopt.error: + return usage() + + verbosity = 1 + search_leaks = False + exclude = [] + for flag, value in opts: + if flag == "-q": + verbosity -= 1 + elif flag == "-v": + verbosity += 1 + elif flag == "-r": + try: + sys.gettotalrefcount + except AttributeError: + print >> sys.stderr, "-r flag requires Python debug build" + return -1 + search_leaks = True + elif flag == "-u": + use_resources.extend(value.split(",")) + elif flag == "-x": + exclude.extend(value.split(",")) + + mask = "test_*.py" + if args: + mask = args[0] + + for package in packages: + run_tests(package, mask, verbosity, search_leaks, exclude) + + +def run_tests(package, mask, verbosity, search_leaks, exclude): + skipped, testcases = get_tests(package, mask, verbosity, exclude) + runner = TestRunner(verbosity=verbosity) + + suites = [unittest.makeSuite(o) for o in testcases] + suite = unittest.TestSuite(suites) + result = runner.run(suite, skipped) + + if search_leaks: + # hunt for refcount leaks + runner = BasicTestRunner() + for t in testcases: + test_with_refcounts(runner, verbosity, t) + + return bool(result.errors) + +class BasicTestRunner: + def run(self, test): + result = unittest.TestResult() + test(result) + return result diff --git a/third_party/stdlib/ctypes/test/runtests.py b/third_party/stdlib/ctypes/test/runtests.py new file mode 100644 index 00000000..b7a2b264 --- /dev/null +++ b/third_party/stdlib/ctypes/test/runtests.py @@ -0,0 +1,19 @@ +"""Usage: runtests.py [-q] [-r] [-v] [-u resources] [mask] + +Run all tests found in this directory, and print a summary of the results. +Command line flags: + -q quiet mode: don't print anything while the tests are running + -r run tests repeatedly, look for refcount leaks + -u + Add resources to the lits of allowed resources. '*' allows all + resources. + -v verbose mode: print the test currently executed + -x + Exclude specified tests. + mask mask to select filenames containing testcases, wildcards allowed +""" +import sys +import ctypes.test + +if __name__ == "__main__": + sys.exit(ctypes.test.main(ctypes.test)) diff --git a/third_party/stdlib/ctypes/test/test_anon.py b/third_party/stdlib/ctypes/test/test_anon.py new file mode 100644 index 00000000..d892b598 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_anon.py @@ -0,0 +1,60 @@ +import unittest +from ctypes import * + +class AnonTest(unittest.TestCase): + + def test_anon(self): + class ANON(Union): + _fields_ = [("a", c_int), + ("b", c_int)] + + class Y(Structure): + _fields_ = [("x", c_int), + ("_", ANON), + ("y", c_int)] + _anonymous_ = ["_"] + + self.assertEqual(Y.a.offset, sizeof(c_int)) + self.assertEqual(Y.b.offset, sizeof(c_int)) + + self.assertEqual(ANON.a.offset, 0) + self.assertEqual(ANON.b.offset, 0) + + def test_anon_nonseq(self): + # TypeError: _anonymous_ must be a sequence + self.assertRaises(TypeError, + lambda: type(Structure)("Name", + (Structure,), + {"_fields_": [], "_anonymous_": 42})) + + def test_anon_nonmember(self): + # AttributeError: type object 'Name' has no attribute 'x' + self.assertRaises(AttributeError, + lambda: type(Structure)("Name", + (Structure,), + {"_fields_": [], + "_anonymous_": ["x"]})) + + def test_nested(self): + class ANON_S(Structure): + _fields_ = [("a", c_int)] + + class ANON_U(Union): + _fields_ = [("_", ANON_S), + ("b", c_int)] + _anonymous_ = ["_"] + + class Y(Structure): + _fields_ = [("x", c_int), + ("_", ANON_U), + ("y", c_int)] + _anonymous_ = ["_"] + + self.assertEqual(Y.x.offset, 0) + self.assertEqual(Y.a.offset, sizeof(c_int)) + self.assertEqual(Y.b.offset, sizeof(c_int)) + self.assertEqual(Y._.offset, sizeof(c_int)) + self.assertEqual(Y.y.offset, sizeof(c_int) * 2) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_array_in_pointer.py b/third_party/stdlib/ctypes/test/test_array_in_pointer.py new file mode 100644 index 00000000..ee7863c5 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_array_in_pointer.py @@ -0,0 +1,64 @@ +import unittest +from ctypes import * +from binascii import hexlify +import re + +def dump(obj): + # helper function to dump memory contents in hex, with a hyphen + # between the bytes. + h = hexlify(memoryview(obj)) + return re.sub(r"(..)", r"\1-", h)[:-1] + + +class Value(Structure): + _fields_ = [("val", c_byte)] + +class Container(Structure): + _fields_ = [("pvalues", POINTER(Value))] + +class Test(unittest.TestCase): + def test(self): + # create an array of 4 values + val_array = (Value * 4)() + + # create a container, which holds a pointer to the pvalues array. + c = Container() + c.pvalues = val_array + + # memory contains 4 NUL bytes now, that's correct + self.assertEqual("00-00-00-00", dump(val_array)) + + # set the values of the array through the pointer: + for i in range(4): + c.pvalues[i].val = i + 1 + + values = [c.pvalues[i].val for i in range(4)] + + # These are the expected results: here s the bug! + self.assertEqual( + (values, dump(val_array)), + ([1, 2, 3, 4], "01-02-03-04") + ) + + def test_2(self): + + val_array = (Value * 4)() + + # memory contains 4 NUL bytes now, that's correct + self.assertEqual("00-00-00-00", dump(val_array)) + + ptr = cast(val_array, POINTER(Value)) + # set the values of the array through the pointer: + for i in range(4): + ptr[i].val = i + 1 + + values = [ptr[i].val for i in range(4)] + + # These are the expected results: here s the bug! + self.assertEqual( + (values, dump(val_array)), + ([1, 2, 3, 4], "01-02-03-04") + ) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_arrays.py b/third_party/stdlib/ctypes/test/test_arrays.py new file mode 100644 index 00000000..49aaab52 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_arrays.py @@ -0,0 +1,136 @@ +import unittest +from ctypes import * + +from ctypes.test import need_symbol + +formats = "bBhHiIlLqQfd" + +formats = c_byte, c_ubyte, c_short, c_ushort, c_int, c_uint, \ + c_long, c_ulonglong, c_float, c_double, c_longdouble + +class ArrayTestCase(unittest.TestCase): + def test_simple(self): + # create classes holding simple numeric types, and check + # various properties. + + init = range(15, 25) + + for fmt in formats: + alen = len(init) + int_array = ARRAY(fmt, alen) + + ia = int_array(*init) + # length of instance ok? + self.assertEqual(len(ia), alen) + + # slot values ok? + values = [ia[i] for i in range(alen)] + self.assertEqual(values, init) + + # out-of-bounds accesses should be caught + with self.assertRaises(IndexError): ia[alen] + with self.assertRaises(IndexError): ia[-alen-1] + + # change the items + from operator import setitem + new_values = range(42, 42+alen) + [setitem(ia, n, new_values[n]) for n in range(alen)] + values = [ia[i] for i in range(alen)] + self.assertEqual(values, new_values) + + # are the items initialized to 0? + ia = int_array() + values = [ia[i] for i in range(alen)] + self.assertEqual(values, [0] * alen) + + # Too many initializers should be caught + self.assertRaises(IndexError, int_array, *range(alen*2)) + + CharArray = ARRAY(c_char, 3) + + ca = CharArray("a", "b", "c") + + # Should this work? It doesn't: + # CharArray("abc") + self.assertRaises(TypeError, CharArray, "abc") + + self.assertEqual(ca[0], "a") + self.assertEqual(ca[1], "b") + self.assertEqual(ca[2], "c") + self.assertEqual(ca[-3], "a") + self.assertEqual(ca[-2], "b") + self.assertEqual(ca[-1], "c") + + self.assertEqual(len(ca), 3) + + # slicing is now supported, but not extended slicing (3-argument)! + from operator import getslice, delitem + self.assertRaises(TypeError, getslice, ca, 0, 1, -1) + + # cannot delete items + self.assertRaises(TypeError, delitem, ca, 0) + + def test_numeric_arrays(self): + + alen = 5 + + numarray = ARRAY(c_int, alen) + + na = numarray() + values = [na[i] for i in range(alen)] + self.assertEqual(values, [0] * alen) + + na = numarray(*[c_int()] * alen) + values = [na[i] for i in range(alen)] + self.assertEqual(values, [0]*alen) + + na = numarray(1, 2, 3, 4, 5) + values = [i for i in na] + self.assertEqual(values, [1, 2, 3, 4, 5]) + + na = numarray(*map(c_int, (1, 2, 3, 4, 5))) + values = [i for i in na] + self.assertEqual(values, [1, 2, 3, 4, 5]) + + def test_classcache(self): + self.assertIsNot(ARRAY(c_int, 3), ARRAY(c_int, 4)) + self.assertIs(ARRAY(c_int, 3), ARRAY(c_int, 3)) + + def test_from_address(self): + # Failed with 0.9.8, reported by JUrner + p = create_string_buffer("foo") + sz = (c_char * 3).from_address(addressof(p)) + self.assertEqual(sz[:], "foo") + self.assertEqual(sz[::], "foo") + self.assertEqual(sz[::-1], "oof") + self.assertEqual(sz[::3], "f") + self.assertEqual(sz[1:4:2], "o") + self.assertEqual(sz.value, "foo") + + @need_symbol('create_unicode_buffer') + def test_from_addressW(self): + p = create_unicode_buffer("foo") + sz = (c_wchar * 3).from_address(addressof(p)) + self.assertEqual(sz[:], "foo") + self.assertEqual(sz[::], "foo") + self.assertEqual(sz[::-1], "oof") + self.assertEqual(sz[::3], "f") + self.assertEqual(sz[1:4:2], "o") + self.assertEqual(sz.value, "foo") + + def test_cache(self): + # Array types are cached internally in the _ctypes extension, + # in a WeakValueDictionary. Make sure the array type is + # removed from the cache when the itemtype goes away. This + # test will not fail, but will show a leak in the testsuite. + + # Create a new type: + class my_int(c_int): + pass + # Create a new array type based on it: + t1 = my_int * 1 + t2 = my_int * 1 + self.assertIs(t1, t2) + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_as_parameter.py b/third_party/stdlib/ctypes/test/test_as_parameter.py new file mode 100644 index 00000000..f2fe10a9 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_as_parameter.py @@ -0,0 +1,226 @@ +import unittest +from ctypes import * +from ctypes.test import need_symbol +import _ctypes_test + +dll = CDLL(_ctypes_test.__file__) + +try: + CALLBACK_FUNCTYPE = WINFUNCTYPE +except NameError: + # fake to enable this test on Linux + CALLBACK_FUNCTYPE = CFUNCTYPE + +class POINT(Structure): + _fields_ = [("x", c_int), ("y", c_int)] + +class BasicWrapTestCase(unittest.TestCase): + def wrap(self, param): + return param + + @need_symbol('c_wchar') + def test_wchar_parm(self): + f = dll._testfunc_i_bhilfd + f.argtypes = [c_byte, c_wchar, c_int, c_long, c_float, c_double] + result = f(self.wrap(1), self.wrap(u"x"), self.wrap(3), self.wrap(4), self.wrap(5.0), self.wrap(6.0)) + self.assertEqual(result, 139) + self.assertTrue(type(result), int) + + def test_pointers(self): + f = dll._testfunc_p_p + f.restype = POINTER(c_int) + f.argtypes = [POINTER(c_int)] + + # This only works if the value c_int(42) passed to the + # function is still alive while the pointer (the result) is + # used. + + v = c_int(42) + + self.assertEqual(pointer(v).contents.value, 42) + result = f(self.wrap(pointer(v))) + self.assertEqual(type(result), POINTER(c_int)) + self.assertEqual(result.contents.value, 42) + + # This on works... + result = f(self.wrap(pointer(v))) + self.assertEqual(result.contents.value, v.value) + + p = pointer(c_int(99)) + result = f(self.wrap(p)) + self.assertEqual(result.contents.value, 99) + + def test_shorts(self): + f = dll._testfunc_callback_i_if + + args = [] + expected = [262144, 131072, 65536, 32768, 16384, 8192, 4096, 2048, + 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1] + + def callback(v): + args.append(v) + return v + + CallBack = CFUNCTYPE(c_int, c_int) + + cb = CallBack(callback) + f(self.wrap(2**18), self.wrap(cb)) + self.assertEqual(args, expected) + + ################################################################ + + def test_callbacks(self): + f = dll._testfunc_callback_i_if + f.restype = c_int + f.argtypes = None + + MyCallback = CFUNCTYPE(c_int, c_int) + + def callback(value): + #print "called back with", value + return value + + cb = MyCallback(callback) + + result = f(self.wrap(-10), self.wrap(cb)) + self.assertEqual(result, -18) + + # test with prototype + f.argtypes = [c_int, MyCallback] + cb = MyCallback(callback) + + result = f(self.wrap(-10), self.wrap(cb)) + self.assertEqual(result, -18) + + result = f(self.wrap(-10), self.wrap(cb)) + self.assertEqual(result, -18) + + AnotherCallback = CALLBACK_FUNCTYPE(c_int, c_int, c_int, c_int, c_int) + + # check that the prototype works: we call f with wrong + # argument types + cb = AnotherCallback(callback) + self.assertRaises(ArgumentError, f, self.wrap(-10), self.wrap(cb)) + + def test_callbacks_2(self): + # Can also use simple datatypes as argument type specifiers + # for the callback function. + # In this case the call receives an instance of that type + f = dll._testfunc_callback_i_if + f.restype = c_int + + MyCallback = CFUNCTYPE(c_int, c_int) + + f.argtypes = [c_int, MyCallback] + + def callback(value): + #print "called back with", value + self.assertEqual(type(value), int) + return value + + cb = MyCallback(callback) + result = f(self.wrap(-10), self.wrap(cb)) + self.assertEqual(result, -18) + + def test_longlong_callbacks(self): + + f = dll._testfunc_callback_q_qf + f.restype = c_longlong + + MyCallback = CFUNCTYPE(c_longlong, c_longlong) + + f.argtypes = [c_longlong, MyCallback] + + def callback(value): + self.assertIsInstance(value, (int, long)) + return value & 0x7FFFFFFF + + cb = MyCallback(callback) + + self.assertEqual(13577625587, int(f(self.wrap(1000000000000), self.wrap(cb)))) + + def test_byval(self): + # without prototype + ptin = POINT(1, 2) + ptout = POINT() + # EXPORT int _testfunc_byval(point in, point *pout) + result = dll._testfunc_byval(ptin, byref(ptout)) + got = result, ptout.x, ptout.y + expected = 3, 1, 2 + self.assertEqual(got, expected) + + # with prototype + ptin = POINT(101, 102) + ptout = POINT() + dll._testfunc_byval.argtypes = (POINT, POINTER(POINT)) + dll._testfunc_byval.restype = c_int + result = dll._testfunc_byval(self.wrap(ptin), byref(ptout)) + got = result, ptout.x, ptout.y + expected = 203, 101, 102 + self.assertEqual(got, expected) + + def test_struct_return_2H(self): + class S2H(Structure): + _fields_ = [("x", c_short), + ("y", c_short)] + dll.ret_2h_func.restype = S2H + dll.ret_2h_func.argtypes = [S2H] + inp = S2H(99, 88) + s2h = dll.ret_2h_func(self.wrap(inp)) + self.assertEqual((s2h.x, s2h.y), (99*2, 88*3)) + + def test_struct_return_8H(self): + class S8I(Structure): + _fields_ = [("a", c_int), + ("b", c_int), + ("c", c_int), + ("d", c_int), + ("e", c_int), + ("f", c_int), + ("g", c_int), + ("h", c_int)] + dll.ret_8i_func.restype = S8I + dll.ret_8i_func.argtypes = [S8I] + inp = S8I(9, 8, 7, 6, 5, 4, 3, 2) + s8i = dll.ret_8i_func(self.wrap(inp)) + self.assertEqual((s8i.a, s8i.b, s8i.c, s8i.d, s8i.e, s8i.f, s8i.g, s8i.h), + (9*2, 8*3, 7*4, 6*5, 5*6, 4*7, 3*8, 2*9)) + + def test_recursive_as_param(self): + from ctypes import c_int + + class A(object): + pass + + a = A() + a._as_parameter_ = a + with self.assertRaises(RuntimeError): + c_int.from_param(a) + + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +class AsParamWrapper(object): + def __init__(self, param): + self._as_parameter_ = param + +class AsParamWrapperTestCase(BasicWrapTestCase): + wrap = AsParamWrapper + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +class AsParamPropertyWrapper(object): + def __init__(self, param): + self._param = param + + def getParameter(self): + return self._param + _as_parameter_ = property(getParameter) + +class AsParamPropertyWrapperTestCase(BasicWrapTestCase): + wrap = AsParamPropertyWrapper + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_bitfields.py b/third_party/stdlib/ctypes/test/test_bitfields.py new file mode 100644 index 00000000..a854d2b7 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_bitfields.py @@ -0,0 +1,291 @@ +from ctypes import * +from ctypes.test import need_symbol +import unittest +import os + +import ctypes +import _ctypes_test + +class BITS(Structure): + _fields_ = [("A", c_int, 1), + ("B", c_int, 2), + ("C", c_int, 3), + ("D", c_int, 4), + ("E", c_int, 5), + ("F", c_int, 6), + ("G", c_int, 7), + ("H", c_int, 8), + ("I", c_int, 9), + + ("M", c_short, 1), + ("N", c_short, 2), + ("O", c_short, 3), + ("P", c_short, 4), + ("Q", c_short, 5), + ("R", c_short, 6), + ("S", c_short, 7)] + +func = CDLL(_ctypes_test.__file__).unpack_bitfields +func.argtypes = POINTER(BITS), c_char + +##for n in "ABCDEFGHIMNOPQRS": +## print n, hex(getattr(BITS, n).size), getattr(BITS, n).offset + +class C_Test(unittest.TestCase): + + def test_ints(self): + for i in range(512): + for name in "ABCDEFGHI": + b = BITS() + setattr(b, name, i) + self.assertEqual((name, i, getattr(b, name)), (name, i, func(byref(b), name))) + + def test_shorts(self): + for i in range(256): + for name in "MNOPQRS": + b = BITS() + setattr(b, name, i) + self.assertEqual((name, i, getattr(b, name)), (name, i, func(byref(b), name))) + +signed_int_types = (c_byte, c_short, c_int, c_long, c_longlong) +unsigned_int_types = (c_ubyte, c_ushort, c_uint, c_ulong, c_ulonglong) +int_types = unsigned_int_types + signed_int_types + +class BitFieldTest(unittest.TestCase): + + def test_longlong(self): + class X(Structure): + _fields_ = [("a", c_longlong, 1), + ("b", c_longlong, 62), + ("c", c_longlong, 1)] + + self.assertEqual(sizeof(X), sizeof(c_longlong)) + x = X() + x.a, x.b, x.c = -1, 7, -1 + self.assertEqual((x.a, x.b, x.c), (-1, 7, -1)) + + def test_ulonglong(self): + class X(Structure): + _fields_ = [("a", c_ulonglong, 1), + ("b", c_ulonglong, 62), + ("c", c_ulonglong, 1)] + + self.assertEqual(sizeof(X), sizeof(c_longlong)) + x = X() + self.assertEqual((x.a, x.b, x.c), (0, 0, 0)) + x.a, x.b, x.c = 7, 7, 7 + self.assertEqual((x.a, x.b, x.c), (1, 7, 1)) + + def test_signed(self): + for c_typ in signed_int_types: + class X(Structure): + _fields_ = [("dummy", c_typ), + ("a", c_typ, 3), + ("b", c_typ, 3), + ("c", c_typ, 1)] + self.assertEqual(sizeof(X), sizeof(c_typ)*2) + + x = X() + self.assertEqual((c_typ, x.a, x.b, x.c), (c_typ, 0, 0, 0)) + x.a = -1 + self.assertEqual((c_typ, x.a, x.b, x.c), (c_typ, -1, 0, 0)) + x.a, x.b = 0, -1 + self.assertEqual((c_typ, x.a, x.b, x.c), (c_typ, 0, -1, 0)) + + + def test_unsigned(self): + for c_typ in unsigned_int_types: + class X(Structure): + _fields_ = [("a", c_typ, 3), + ("b", c_typ, 3), + ("c", c_typ, 1)] + self.assertEqual(sizeof(X), sizeof(c_typ)) + + x = X() + self.assertEqual((c_typ, x.a, x.b, x.c), (c_typ, 0, 0, 0)) + x.a = -1 + self.assertEqual((c_typ, x.a, x.b, x.c), (c_typ, 7, 0, 0)) + x.a, x.b = 0, -1 + self.assertEqual((c_typ, x.a, x.b, x.c), (c_typ, 0, 7, 0)) + + + def fail_fields(self, *fields): + return self.get_except(type(Structure), "X", (), + {"_fields_": fields}) + + def test_nonint_types(self): + # bit fields are not allowed on non-integer types. + result = self.fail_fields(("a", c_char_p, 1)) + self.assertEqual(result, (TypeError, 'bit fields not allowed for type c_char_p')) + + result = self.fail_fields(("a", c_void_p, 1)) + self.assertEqual(result, (TypeError, 'bit fields not allowed for type c_void_p')) + + if c_int != c_long: + result = self.fail_fields(("a", POINTER(c_int), 1)) + self.assertEqual(result, (TypeError, 'bit fields not allowed for type LP_c_int')) + + result = self.fail_fields(("a", c_char, 1)) + self.assertEqual(result, (TypeError, 'bit fields not allowed for type c_char')) + + class Dummy(Structure): + _fields_ = [] + + result = self.fail_fields(("a", Dummy, 1)) + self.assertEqual(result, (TypeError, 'bit fields not allowed for type Dummy')) + + @need_symbol('c_wchar') + def test_c_wchar(self): + result = self.fail_fields(("a", c_wchar, 1)) + self.assertEqual(result, + (TypeError, 'bit fields not allowed for type c_wchar')) + + def test_single_bitfield_size(self): + for c_typ in int_types: + result = self.fail_fields(("a", c_typ, -1)) + self.assertEqual(result, (ValueError, 'number of bits invalid for bit field')) + + result = self.fail_fields(("a", c_typ, 0)) + self.assertEqual(result, (ValueError, 'number of bits invalid for bit field')) + + class X(Structure): + _fields_ = [("a", c_typ, 1)] + self.assertEqual(sizeof(X), sizeof(c_typ)) + + class X(Structure): + _fields_ = [("a", c_typ, sizeof(c_typ)*8)] + self.assertEqual(sizeof(X), sizeof(c_typ)) + + result = self.fail_fields(("a", c_typ, sizeof(c_typ)*8 + 1)) + self.assertEqual(result, (ValueError, 'number of bits invalid for bit field')) + + def test_multi_bitfields_size(self): + class X(Structure): + _fields_ = [("a", c_short, 1), + ("b", c_short, 14), + ("c", c_short, 1)] + self.assertEqual(sizeof(X), sizeof(c_short)) + + class X(Structure): + _fields_ = [("a", c_short, 1), + ("a1", c_short), + ("b", c_short, 14), + ("c", c_short, 1)] + self.assertEqual(sizeof(X), sizeof(c_short)*3) + self.assertEqual(X.a.offset, 0) + self.assertEqual(X.a1.offset, sizeof(c_short)) + self.assertEqual(X.b.offset, sizeof(c_short)*2) + self.assertEqual(X.c.offset, sizeof(c_short)*2) + + class X(Structure): + _fields_ = [("a", c_short, 3), + ("b", c_short, 14), + ("c", c_short, 14)] + self.assertEqual(sizeof(X), sizeof(c_short)*3) + self.assertEqual(X.a.offset, sizeof(c_short)*0) + self.assertEqual(X.b.offset, sizeof(c_short)*1) + self.assertEqual(X.c.offset, sizeof(c_short)*2) + + + def get_except(self, func, *args, **kw): + try: + func(*args, **kw) + except Exception, detail: + return detail.__class__, str(detail) + + def test_mixed_1(self): + class X(Structure): + _fields_ = [("a", c_byte, 4), + ("b", c_int, 4)] + if os.name in ("nt", "ce"): + self.assertEqual(sizeof(X), sizeof(c_int)*2) + else: + self.assertEqual(sizeof(X), sizeof(c_int)) + + def test_mixed_2(self): + class X(Structure): + _fields_ = [("a", c_byte, 4), + ("b", c_int, 32)] + self.assertEqual(sizeof(X), alignment(c_int)+sizeof(c_int)) + + def test_mixed_3(self): + class X(Structure): + _fields_ = [("a", c_byte, 4), + ("b", c_ubyte, 4)] + self.assertEqual(sizeof(X), sizeof(c_byte)) + + def test_mixed_4(self): + class X(Structure): + _fields_ = [("a", c_short, 4), + ("b", c_short, 4), + ("c", c_int, 24), + ("d", c_short, 4), + ("e", c_short, 4), + ("f", c_int, 24)] + # MSVC does NOT combine c_short and c_int into one field, GCC + # does (unless GCC is run with '-mms-bitfields' which + # produces code compatible with MSVC). + if os.name in ("nt", "ce"): + self.assertEqual(sizeof(X), sizeof(c_int) * 4) + else: + self.assertEqual(sizeof(X), sizeof(c_int) * 2) + + def test_anon_bitfields(self): + # anonymous bit-fields gave a strange error message + class X(Structure): + _fields_ = [("a", c_byte, 4), + ("b", c_ubyte, 4)] + class Y(Structure): + _anonymous_ = ["_"] + _fields_ = [("_", X)] + + @need_symbol('c_uint32') + def test_uint32(self): + class X(Structure): + _fields_ = [("a", c_uint32, 32)] + x = X() + x.a = 10 + self.assertEqual(x.a, 10) + x.a = 0xFDCBA987 + self.assertEqual(x.a, 0xFDCBA987) + + @need_symbol('c_uint64') + def test_uint64(self): + class X(Structure): + _fields_ = [("a", c_uint64, 64)] + x = X() + x.a = 10 + self.assertEqual(x.a, 10) + x.a = 0xFEDCBA9876543211 + self.assertEqual(x.a, 0xFEDCBA9876543211) + + @need_symbol('c_uint32') + def test_uint32_swap_little_endian(self): + # Issue #23319 + class Little(LittleEndianStructure): + _fields_ = [("a", c_uint32, 24), + ("b", c_uint32, 4), + ("c", c_uint32, 4)] + b = bytearray(4) + x = Little.from_buffer(b) + x.a = 0xabcdef + x.b = 1 + x.c = 2 + self.assertEqual(b, b'\xef\xcd\xab\x21') + + @need_symbol('c_uint32') + def test_uint32_swap_big_endian(self): + # Issue #23319 + class Big(BigEndianStructure): + _fields_ = [("a", c_uint32, 24), + ("b", c_uint32, 4), + ("c", c_uint32, 4)] + b = bytearray(4) + x = Big.from_buffer(b) + x.a = 0xabcdef + x.b = 1 + x.c = 2 + self.assertEqual(b, b'\xab\xcd\xef\x12') + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_buffers.py b/third_party/stdlib/ctypes/test/test_buffers.py new file mode 100644 index 00000000..88d87e98 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_buffers.py @@ -0,0 +1,72 @@ +from ctypes import * +from ctypes.test import need_symbol +import unittest + +class StringBufferTestCase(unittest.TestCase): + + def test_buffer(self): + b = create_string_buffer(32) + self.assertEqual(len(b), 32) + self.assertEqual(sizeof(b), 32 * sizeof(c_char)) + self.assertIs(type(b[0]), str) + + b = create_string_buffer("abc") + self.assertEqual(len(b), 4) # trailing nul char + self.assertEqual(sizeof(b), 4 * sizeof(c_char)) + self.assertIs(type(b[0]), str) + self.assertEqual(b[0], "a") + self.assertEqual(b[:], "abc\0") + self.assertEqual(b[::], "abc\0") + self.assertEqual(b[::-1], "\0cba") + self.assertEqual(b[::2], "ac") + self.assertEqual(b[::5], "a") + + def test_buffer_interface(self): + self.assertEqual(len(bytearray(create_string_buffer(0))), 0) + self.assertEqual(len(bytearray(create_string_buffer(1))), 1) + + def test_string_conversion(self): + b = create_string_buffer(u"abc") + self.assertEqual(len(b), 4) # trailing nul char + self.assertEqual(sizeof(b), 4 * sizeof(c_char)) + self.assertTrue(type(b[0]) is str) + self.assertEqual(b[0], "a") + self.assertEqual(b[:], "abc\0") + self.assertEqual(b[::], "abc\0") + self.assertEqual(b[::-1], "\0cba") + self.assertEqual(b[::2], "ac") + self.assertEqual(b[::5], "a") + + @need_symbol('c_wchar') + def test_unicode_buffer(self): + b = create_unicode_buffer(32) + self.assertEqual(len(b), 32) + self.assertEqual(sizeof(b), 32 * sizeof(c_wchar)) + self.assertIs(type(b[0]), unicode) + + b = create_unicode_buffer(u"abc") + self.assertEqual(len(b), 4) # trailing nul char + self.assertEqual(sizeof(b), 4 * sizeof(c_wchar)) + self.assertIs(type(b[0]), unicode) + self.assertEqual(b[0], u"a") + self.assertEqual(b[:], "abc\0") + self.assertEqual(b[::], "abc\0") + self.assertEqual(b[::-1], "\0cba") + self.assertEqual(b[::2], "ac") + self.assertEqual(b[::5], "a") + + @need_symbol('c_wchar') + def test_unicode_conversion(self): + b = create_unicode_buffer("abc") + self.assertEqual(len(b), 4) # trailing nul char + self.assertEqual(sizeof(b), 4 * sizeof(c_wchar)) + self.assertIs(type(b[0]), unicode) + self.assertEqual(b[0], u"a") + self.assertEqual(b[:], "abc\0") + self.assertEqual(b[::], "abc\0") + self.assertEqual(b[::-1], "\0cba") + self.assertEqual(b[::2], "ac") + self.assertEqual(b[::5], "a") + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_byteswap.py b/third_party/stdlib/ctypes/test/test_byteswap.py new file mode 100644 index 00000000..d36402b4 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_byteswap.py @@ -0,0 +1,295 @@ +import sys, unittest, struct, math, ctypes +from binascii import hexlify + +from ctypes import * + +def bin(s): + return hexlify(memoryview(s)).upper() + +# Each *simple* type that supports different byte orders has an +# __ctype_be__ attribute that specifies the same type in BIG ENDIAN +# byte order, and a __ctype_le__ attribute that is the same type in +# LITTLE ENDIAN byte order. +# +# For Structures and Unions, these types are created on demand. + +class Test(unittest.TestCase): + @unittest.skip('test disabled') + def test_X(self): + print >> sys.stderr, sys.byteorder + for i in range(32): + bits = BITS() + setattr(bits, "i%s" % i, 1) + dump(bits) + + def test_endian_short(self): + if sys.byteorder == "little": + self.assertIs(c_short.__ctype_le__, c_short) + self.assertIs(c_short.__ctype_be__.__ctype_le__, c_short) + else: + self.assertIs(c_short.__ctype_be__, c_short) + self.assertIs(c_short.__ctype_le__.__ctype_be__, c_short) + s = c_short.__ctype_be__(0x1234) + self.assertEqual(bin(struct.pack(">h", 0x1234)), "1234") + self.assertEqual(bin(s), "1234") + self.assertEqual(s.value, 0x1234) + + s = c_short.__ctype_le__(0x1234) + self.assertEqual(bin(struct.pack("h", 0x1234)), "1234") + self.assertEqual(bin(s), "1234") + self.assertEqual(s.value, 0x1234) + + s = c_ushort.__ctype_le__(0x1234) + self.assertEqual(bin(struct.pack("i", 0x12345678)), "12345678") + self.assertEqual(bin(s), "12345678") + self.assertEqual(s.value, 0x12345678) + + s = c_int.__ctype_le__(0x12345678) + self.assertEqual(bin(struct.pack("I", 0x12345678)), "12345678") + self.assertEqual(bin(s), "12345678") + self.assertEqual(s.value, 0x12345678) + + s = c_uint.__ctype_le__(0x12345678) + self.assertEqual(bin(struct.pack("q", 0x1234567890ABCDEF)), "1234567890ABCDEF") + self.assertEqual(bin(s), "1234567890ABCDEF") + self.assertEqual(s.value, 0x1234567890ABCDEF) + + s = c_longlong.__ctype_le__(0x1234567890ABCDEF) + self.assertEqual(bin(struct.pack("Q", 0x1234567890ABCDEF)), "1234567890ABCDEF") + self.assertEqual(bin(s), "1234567890ABCDEF") + self.assertEqual(s.value, 0x1234567890ABCDEF) + + s = c_ulonglong.__ctype_le__(0x1234567890ABCDEF) + self.assertEqual(bin(struct.pack("f", math.pi)), bin(s)) + + def test_endian_double(self): + if sys.byteorder == "little": + self.assertIs(c_double.__ctype_le__, c_double) + self.assertIs(c_double.__ctype_be__.__ctype_le__, c_double) + else: + self.assertIs(c_double.__ctype_be__, c_double) + self.assertIs(c_double.__ctype_le__.__ctype_be__, c_double) + s = c_double(math.pi) + self.assertEqual(s.value, math.pi) + self.assertEqual(bin(struct.pack("d", math.pi)), bin(s)) + s = c_double.__ctype_le__(math.pi) + self.assertEqual(s.value, math.pi) + self.assertEqual(bin(struct.pack("d", math.pi)), bin(s)) + + def test_endian_other(self): + self.assertIs(c_byte.__ctype_le__, c_byte) + self.assertIs(c_byte.__ctype_be__, c_byte) + + self.assertIs(c_ubyte.__ctype_le__, c_ubyte) + self.assertIs(c_ubyte.__ctype_be__, c_ubyte) + + self.assertIs(c_char.__ctype_le__, c_char) + self.assertIs(c_char.__ctype_be__, c_char) + + def test_struct_fields_1(self): + if sys.byteorder == "little": + base = BigEndianStructure + else: + base = LittleEndianStructure + + class T(base): + pass + _fields_ = [("a", c_ubyte), + ("b", c_byte), + ("c", c_short), + ("d", c_ushort), + ("e", c_int), + ("f", c_uint), + ("g", c_long), + ("h", c_ulong), + ("i", c_longlong), + ("k", c_ulonglong), + ("l", c_float), + ("m", c_double), + ("n", c_char), + + ("b1", c_byte, 3), + ("b2", c_byte, 3), + ("b3", c_byte, 2), + ("a", c_int * 3 * 3 * 3)] + T._fields_ = _fields_ + + # these fields do not support different byte order: + for typ in c_wchar, c_void_p, POINTER(c_int): + _fields_.append(("x", typ)) + class T(base): + pass + self.assertRaises(TypeError, setattr, T, "_fields_", [("x", typ)]) + + def test_struct_struct(self): + # nested structures with different byteorders + + # create nested structures with given byteorders and set memory to data + + for nested, data in ( + (BigEndianStructure, b'\0\0\0\1\0\0\0\2'), + (LittleEndianStructure, b'\1\0\0\0\2\0\0\0'), + ): + for parent in ( + BigEndianStructure, + LittleEndianStructure, + Structure, + ): + class NestedStructure(nested): + _fields_ = [("x", c_uint32), + ("y", c_uint32)] + + class TestStructure(parent): + _fields_ = [("point", NestedStructure)] + + self.assertEqual(len(data), sizeof(TestStructure)) + ptr = POINTER(TestStructure) + s = cast(data, ptr)[0] + del ctypes._pointer_type_cache[TestStructure] + self.assertEqual(s.point.x, 1) + self.assertEqual(s.point.y, 2) + + def test_struct_fields_2(self): + # standard packing in struct uses no alignment. + # So, we have to align using pad bytes. + # + # Unaligned accesses will crash Python (on those platforms that + # don't allow it, like sparc solaris). + if sys.byteorder == "little": + base = BigEndianStructure + fmt = ">bxhid" + else: + base = LittleEndianStructure + fmt = " float -> double + import math + self.check_type(c_float, math.e) + self.check_type(c_float, -math.e) + + def test_double(self): + self.check_type(c_double, 3.14) + self.check_type(c_double, -3.14) + + def test_longdouble(self): + self.check_type(c_longdouble, 3.14) + self.check_type(c_longdouble, -3.14) + + def test_char(self): + self.check_type(c_char, "x") + self.check_type(c_char, "a") + + # disabled: would now (correctly) raise a RuntimeWarning about + # a memory leak. A callback function cannot return a non-integral + # C type without causing a memory leak. + @unittest.skip('test disabled') + def test_char_p(self): + self.check_type(c_char_p, "abc") + self.check_type(c_char_p, "def") + + def test_pyobject(self): + o = () + from sys import getrefcount as grc + for o in (), [], object(): + initial = grc(o) + # This call leaks a reference to 'o'... + self.check_type(py_object, o) + before = grc(o) + # ...but this call doesn't leak any more. Where is the refcount? + self.check_type(py_object, o) + after = grc(o) + self.assertEqual((after, o), (before, o)) + + def test_unsupported_restype_1(self): + # Only "fundamental" result types are supported for callback + # functions, the type must have a non-NULL stgdict->setfunc. + # POINTER(c_double), for example, is not supported. + + prototype = self.functype.im_func(POINTER(c_double)) + # The type is checked when the prototype is called + self.assertRaises(TypeError, prototype, lambda: None) + + def test_unsupported_restype_2(self): + prototype = self.functype.im_func(object) + self.assertRaises(TypeError, prototype, lambda: None) + + def test_issue_7959(self): + proto = self.functype.im_func(None) + + class X(object): + def func(self): pass + def __init__(self): + self.v = proto(self.func) + + import gc + for i in range(32): + X() + gc.collect() + live = [x for x in gc.get_objects() + if isinstance(x, X)] + self.assertEqual(len(live), 0) + + def test_issue12483(self): + import gc + class Nasty: + def __del__(self): + gc.collect() + CFUNCTYPE(None)(lambda x=Nasty(): None) + + +@need_symbol('WINFUNCTYPE') +class StdcallCallbacks(Callbacks): + try: + functype = WINFUNCTYPE + except NameError: + pass + +################################################################ + +class SampleCallbacksTestCase(unittest.TestCase): + + def test_integrate(self): + # Derived from some then non-working code, posted by David Foster + dll = CDLL(_ctypes_test.__file__) + + # The function prototype called by 'integrate': double func(double); + CALLBACK = CFUNCTYPE(c_double, c_double) + + # The integrate function itself, exposed from the _ctypes_test dll + integrate = dll.integrate + integrate.argtypes = (c_double, c_double, CALLBACK, c_long) + integrate.restype = c_double + + def func(x): + return x**2 + + result = integrate(0.0, 1.0, CALLBACK(func), 10) + diff = abs(result - 1./3.) + + self.assertLess(diff, 0.01, "%s not less than 0.01" % diff) + + def test_issue_8959_a(self): + from ctypes.util import find_library + libc_path = find_library("c") + if not libc_path: + self.skipTest('could not find libc') + libc = CDLL(libc_path) + + @CFUNCTYPE(c_int, POINTER(c_int), POINTER(c_int)) + def cmp_func(a, b): + return a[0] - b[0] + + array = (c_int * 5)(5, 1, 99, 7, 33) + + libc.qsort(array, len(array), sizeof(c_int), cmp_func) + self.assertEqual(array[:], [1, 5, 7, 33, 99]) + + @need_symbol('WINFUNCTYPE') + def test_issue_8959_b(self): + from ctypes.wintypes import BOOL, HWND, LPARAM + global windowCount + windowCount = 0 + + @WINFUNCTYPE(BOOL, HWND, LPARAM) + def EnumWindowsCallbackFunc(hwnd, lParam): + global windowCount + windowCount += 1 + return True #Allow windows to keep enumerating + + windll.user32.EnumWindows(EnumWindowsCallbackFunc, 0) + + def test_callback_register_int(self): + # Issue #8275: buggy handling of callback args under Win64 + # NOTE: should be run on release builds as well + dll = CDLL(_ctypes_test.__file__) + CALLBACK = CFUNCTYPE(c_int, c_int, c_int, c_int, c_int, c_int) + # All this function does is call the callback with its args squared + func = dll._testfunc_cbk_reg_int + func.argtypes = (c_int, c_int, c_int, c_int, c_int, CALLBACK) + func.restype = c_int + + def callback(a, b, c, d, e): + return a + b + c + d + e + + result = func(2, 3, 4, 5, 6, CALLBACK(callback)) + self.assertEqual(result, callback(2*2, 3*3, 4*4, 5*5, 6*6)) + + def test_callback_register_double(self): + # Issue #8275: buggy handling of callback args under Win64 + # NOTE: should be run on release builds as well + dll = CDLL(_ctypes_test.__file__) + CALLBACK = CFUNCTYPE(c_double, c_double, c_double, c_double, + c_double, c_double) + # All this function does is call the callback with its args squared + func = dll._testfunc_cbk_reg_double + func.argtypes = (c_double, c_double, c_double, + c_double, c_double, CALLBACK) + func.restype = c_double + + def callback(a, b, c, d, e): + return a + b + c + d + e + + result = func(1.1, 2.2, 3.3, 4.4, 5.5, CALLBACK(callback)) + self.assertEqual(result, + callback(1.1*1.1, 2.2*2.2, 3.3*3.3, 4.4*4.4, 5.5*5.5)) + + +################################################################ + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_cast.py b/third_party/stdlib/ctypes/test/test_cast.py new file mode 100644 index 00000000..d24e0b59 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_cast.py @@ -0,0 +1,86 @@ +from ctypes import * +from ctypes.test import need_symbol +import unittest +import sys + +class Test(unittest.TestCase): + + def test_array2pointer(self): + array = (c_int * 3)(42, 17, 2) + + # casting an array to a pointer works. + ptr = cast(array, POINTER(c_int)) + self.assertEqual([ptr[i] for i in range(3)], [42, 17, 2]) + + if 2*sizeof(c_short) == sizeof(c_int): + ptr = cast(array, POINTER(c_short)) + if sys.byteorder == "little": + self.assertEqual([ptr[i] for i in range(6)], + [42, 0, 17, 0, 2, 0]) + else: + self.assertEqual([ptr[i] for i in range(6)], + [0, 42, 0, 17, 0, 2]) + + def test_address2pointer(self): + array = (c_int * 3)(42, 17, 2) + + address = addressof(array) + ptr = cast(c_void_p(address), POINTER(c_int)) + self.assertEqual([ptr[i] for i in range(3)], [42, 17, 2]) + + ptr = cast(address, POINTER(c_int)) + self.assertEqual([ptr[i] for i in range(3)], [42, 17, 2]) + + def test_p2a_objects(self): + array = (c_char_p * 5)() + self.assertEqual(array._objects, None) + array[0] = "foo bar" + self.assertEqual(array._objects, {'0': "foo bar"}) + + p = cast(array, POINTER(c_char_p)) + # array and p share a common _objects attribute + self.assertIs(p._objects, array._objects) + self.assertEqual(array._objects, {'0': "foo bar", id(array): array}) + p[0] = "spam spam" + self.assertEqual(p._objects, {'0': "spam spam", id(array): array}) + self.assertIs(array._objects, p._objects) + p[1] = "foo bar" + self.assertEqual(p._objects, {'1': 'foo bar', '0': "spam spam", id(array): array}) + self.assertIs(array._objects, p._objects) + + def test_other(self): + p = cast((c_int * 4)(1, 2, 3, 4), POINTER(c_int)) + self.assertEqual(p[:4], [1,2, 3, 4]) + self.assertEqual(p[:4:], [1, 2, 3, 4]) + self.assertEqual(p[3:-1:-1], [4, 3, 2, 1]) + self.assertEqual(p[:4:3], [1, 4]) + c_int() + self.assertEqual(p[:4], [1, 2, 3, 4]) + self.assertEqual(p[:4:], [1, 2, 3, 4]) + self.assertEqual(p[3:-1:-1], [4, 3, 2, 1]) + self.assertEqual(p[:4:3], [1, 4]) + p[2] = 96 + self.assertEqual(p[:4], [1, 2, 96, 4]) + self.assertEqual(p[:4:], [1, 2, 96, 4]) + self.assertEqual(p[3:-1:-1], [4, 96, 2, 1]) + self.assertEqual(p[:4:3], [1, 4]) + c_int() + self.assertEqual(p[:4], [1, 2, 96, 4]) + self.assertEqual(p[:4:], [1, 2, 96, 4]) + self.assertEqual(p[3:-1:-1], [4, 96, 2, 1]) + self.assertEqual(p[:4:3], [1, 4]) + + def test_char_p(self): + # This didn't work: bad argument to internal function + s = c_char_p("hiho") + self.assertEqual(cast(cast(s, c_void_p), c_char_p).value, + "hiho") + + @need_symbol('c_wchar_p') + def test_wchar_p(self): + s = c_wchar_p("hiho") + self.assertEqual(cast(cast(s, c_void_p), c_wchar_p).value, + "hiho") + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_cfuncs.py b/third_party/stdlib/ctypes/test/test_cfuncs.py new file mode 100644 index 00000000..765408c6 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_cfuncs.py @@ -0,0 +1,212 @@ +# A lot of failures in these tests on Mac OS X. +# Byte order related? + +import unittest +from ctypes import * +from ctypes.test import need_symbol + +import _ctypes_test + +class CFunctions(unittest.TestCase): + _dll = CDLL(_ctypes_test.__file__) + + def S(self): + return c_longlong.in_dll(self._dll, "last_tf_arg_s").value + def U(self): + return c_ulonglong.in_dll(self._dll, "last_tf_arg_u").value + + def test_byte(self): + self._dll.tf_b.restype = c_byte + self._dll.tf_b.argtypes = (c_byte,) + self.assertEqual(self._dll.tf_b(-126), -42) + self.assertEqual(self.S(), -126) + + def test_byte_plus(self): + self._dll.tf_bb.restype = c_byte + self._dll.tf_bb.argtypes = (c_byte, c_byte) + self.assertEqual(self._dll.tf_bb(0, -126), -42) + self.assertEqual(self.S(), -126) + + def test_ubyte(self): + self._dll.tf_B.restype = c_ubyte + self._dll.tf_B.argtypes = (c_ubyte,) + self.assertEqual(self._dll.tf_B(255), 85) + self.assertEqual(self.U(), 255) + + def test_ubyte_plus(self): + self._dll.tf_bB.restype = c_ubyte + self._dll.tf_bB.argtypes = (c_byte, c_ubyte) + self.assertEqual(self._dll.tf_bB(0, 255), 85) + self.assertEqual(self.U(), 255) + + def test_short(self): + self._dll.tf_h.restype = c_short + self._dll.tf_h.argtypes = (c_short,) + self.assertEqual(self._dll.tf_h(-32766), -10922) + self.assertEqual(self.S(), -32766) + + def test_short_plus(self): + self._dll.tf_bh.restype = c_short + self._dll.tf_bh.argtypes = (c_byte, c_short) + self.assertEqual(self._dll.tf_bh(0, -32766), -10922) + self.assertEqual(self.S(), -32766) + + def test_ushort(self): + self._dll.tf_H.restype = c_ushort + self._dll.tf_H.argtypes = (c_ushort,) + self.assertEqual(self._dll.tf_H(65535), 21845) + self.assertEqual(self.U(), 65535) + + def test_ushort_plus(self): + self._dll.tf_bH.restype = c_ushort + self._dll.tf_bH.argtypes = (c_byte, c_ushort) + self.assertEqual(self._dll.tf_bH(0, 65535), 21845) + self.assertEqual(self.U(), 65535) + + def test_int(self): + self._dll.tf_i.restype = c_int + self._dll.tf_i.argtypes = (c_int,) + self.assertEqual(self._dll.tf_i(-2147483646), -715827882) + self.assertEqual(self.S(), -2147483646) + + def test_int_plus(self): + self._dll.tf_bi.restype = c_int + self._dll.tf_bi.argtypes = (c_byte, c_int) + self.assertEqual(self._dll.tf_bi(0, -2147483646), -715827882) + self.assertEqual(self.S(), -2147483646) + + def test_uint(self): + self._dll.tf_I.restype = c_uint + self._dll.tf_I.argtypes = (c_uint,) + self.assertEqual(self._dll.tf_I(4294967295), 1431655765) + self.assertEqual(self.U(), 4294967295) + + def test_uint_plus(self): + self._dll.tf_bI.restype = c_uint + self._dll.tf_bI.argtypes = (c_byte, c_uint) + self.assertEqual(self._dll.tf_bI(0, 4294967295), 1431655765) + self.assertEqual(self.U(), 4294967295) + + def test_long(self): + self._dll.tf_l.restype = c_long + self._dll.tf_l.argtypes = (c_long,) + self.assertEqual(self._dll.tf_l(-2147483646), -715827882) + self.assertEqual(self.S(), -2147483646) + + def test_long_plus(self): + self._dll.tf_bl.restype = c_long + self._dll.tf_bl.argtypes = (c_byte, c_long) + self.assertEqual(self._dll.tf_bl(0, -2147483646), -715827882) + self.assertEqual(self.S(), -2147483646) + + def test_ulong(self): + self._dll.tf_L.restype = c_ulong + self._dll.tf_L.argtypes = (c_ulong,) + self.assertEqual(self._dll.tf_L(4294967295), 1431655765) + self.assertEqual(self.U(), 4294967295) + + def test_ulong_plus(self): + self._dll.tf_bL.restype = c_ulong + self._dll.tf_bL.argtypes = (c_char, c_ulong) + self.assertEqual(self._dll.tf_bL(' ', 4294967295), 1431655765) + self.assertEqual(self.U(), 4294967295) + + def test_longlong(self): + self._dll.tf_q.restype = c_longlong + self._dll.tf_q.argtypes = (c_longlong, ) + self.assertEqual(self._dll.tf_q(-9223372036854775806), -3074457345618258602) + self.assertEqual(self.S(), -9223372036854775806) + + def test_longlong_plus(self): + self._dll.tf_bq.restype = c_longlong + self._dll.tf_bq.argtypes = (c_byte, c_longlong) + self.assertEqual(self._dll.tf_bq(0, -9223372036854775806), -3074457345618258602) + self.assertEqual(self.S(), -9223372036854775806) + + def test_ulonglong(self): + self._dll.tf_Q.restype = c_ulonglong + self._dll.tf_Q.argtypes = (c_ulonglong, ) + self.assertEqual(self._dll.tf_Q(18446744073709551615), 6148914691236517205) + self.assertEqual(self.U(), 18446744073709551615) + + def test_ulonglong_plus(self): + self._dll.tf_bQ.restype = c_ulonglong + self._dll.tf_bQ.argtypes = (c_byte, c_ulonglong) + self.assertEqual(self._dll.tf_bQ(0, 18446744073709551615), 6148914691236517205) + self.assertEqual(self.U(), 18446744073709551615) + + def test_float(self): + self._dll.tf_f.restype = c_float + self._dll.tf_f.argtypes = (c_float,) + self.assertEqual(self._dll.tf_f(-42.), -14.) + self.assertEqual(self.S(), -42) + + def test_float_plus(self): + self._dll.tf_bf.restype = c_float + self._dll.tf_bf.argtypes = (c_byte, c_float) + self.assertEqual(self._dll.tf_bf(0, -42.), -14.) + self.assertEqual(self.S(), -42) + + def test_double(self): + self._dll.tf_d.restype = c_double + self._dll.tf_d.argtypes = (c_double,) + self.assertEqual(self._dll.tf_d(42.), 14.) + self.assertEqual(self.S(), 42) + + def test_double_plus(self): + self._dll.tf_bd.restype = c_double + self._dll.tf_bd.argtypes = (c_byte, c_double) + self.assertEqual(self._dll.tf_bd(0, 42.), 14.) + self.assertEqual(self.S(), 42) + + def test_longdouble(self): + self._dll.tf_D.restype = c_longdouble + self._dll.tf_D.argtypes = (c_longdouble,) + self.assertEqual(self._dll.tf_D(42.), 14.) + self.assertEqual(self.S(), 42) + + def test_longdouble_plus(self): + self._dll.tf_bD.restype = c_longdouble + self._dll.tf_bD.argtypes = (c_byte, c_longdouble) + self.assertEqual(self._dll.tf_bD(0, 42.), 14.) + self.assertEqual(self.S(), 42) + + def test_callwithresult(self): + def process_result(result): + return result * 2 + self._dll.tf_i.restype = process_result + self._dll.tf_i.argtypes = (c_int,) + self.assertEqual(self._dll.tf_i(42), 28) + self.assertEqual(self.S(), 42) + self.assertEqual(self._dll.tf_i(-42), -28) + self.assertEqual(self.S(), -42) + + def test_void(self): + self._dll.tv_i.restype = None + self._dll.tv_i.argtypes = (c_int,) + self.assertEqual(self._dll.tv_i(42), None) + self.assertEqual(self.S(), 42) + self.assertEqual(self._dll.tv_i(-42), None) + self.assertEqual(self.S(), -42) + +# The following repeats the above tests with stdcall functions (where +# they are available) +try: + WinDLL +except NameError: + def stdcall_dll(*_): pass +else: + class stdcall_dll(WinDLL): + def __getattr__(self, name): + if name[:2] == '__' and name[-2:] == '__': + raise AttributeError(name) + func = self._FuncPtr(("s_" + name, self)) + setattr(self, name, func) + return func + +@need_symbol('WinDLL') +class stdcallCFunctions(CFunctions): + _dll = stdcall_dll(_ctypes_test.__file__) + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_checkretval.py b/third_party/stdlib/ctypes/test/test_checkretval.py new file mode 100644 index 00000000..a0dc5344 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_checkretval.py @@ -0,0 +1,36 @@ +import unittest + +from ctypes import * +from ctypes.test import need_symbol + +class CHECKED(c_int): + def _check_retval_(value): + # Receives a CHECKED instance. + return str(value.value) + _check_retval_ = staticmethod(_check_retval_) + +class Test(unittest.TestCase): + + def test_checkretval(self): + + import _ctypes_test + dll = CDLL(_ctypes_test.__file__) + self.assertEqual(42, dll._testfunc_p_p(42)) + + dll._testfunc_p_p.restype = CHECKED + self.assertEqual("42", dll._testfunc_p_p(42)) + + dll._testfunc_p_p.restype = None + self.assertEqual(None, dll._testfunc_p_p(42)) + + del dll._testfunc_p_p.restype + self.assertEqual(42, dll._testfunc_p_p(42)) + + @need_symbol('oledll') + def test_oledll(self): + self.assertRaises(WindowsError, + oledll.oleaut32.CreateTypeLib2, + 0, None, None) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_delattr.py b/third_party/stdlib/ctypes/test/test_delattr.py new file mode 100644 index 00000000..0f4d5869 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_delattr.py @@ -0,0 +1,21 @@ +import unittest +from ctypes import * + +class X(Structure): + _fields_ = [("foo", c_int)] + +class TestCase(unittest.TestCase): + def test_simple(self): + self.assertRaises(TypeError, + delattr, c_int(42), "value") + + def test_chararray(self): + self.assertRaises(TypeError, + delattr, (c_char * 5)(), "value") + + def test_struct(self): + self.assertRaises(TypeError, + delattr, X(), "foo") + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_errno.py b/third_party/stdlib/ctypes/test/test_errno.py new file mode 100644 index 00000000..c7a5bf0d --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_errno.py @@ -0,0 +1,80 @@ +import unittest, os, errno +from ctypes import * +from ctypes.util import find_library +from test import test_support +try: + import threading +except ImportError: + threading = None + +class Test(unittest.TestCase): + def test_open(self): + libc_name = find_library("c") + if libc_name is None: + raise unittest.SkipTest("Unable to find C library") + libc = CDLL(libc_name, use_errno=True) + if os.name == "nt": + libc_open = libc._open + else: + libc_open = libc.open + + libc_open.argtypes = c_char_p, c_int + + self.assertEqual(libc_open("", 0), -1) + self.assertEqual(get_errno(), errno.ENOENT) + + self.assertEqual(set_errno(32), errno.ENOENT) + self.assertEqual(get_errno(), 32) + + if threading: + def _worker(): + set_errno(0) + + libc = CDLL(libc_name, use_errno=False) + if os.name == "nt": + libc_open = libc._open + else: + libc_open = libc.open + libc_open.argtypes = c_char_p, c_int + self.assertEqual(libc_open("", 0), -1) + self.assertEqual(get_errno(), 0) + + t = threading.Thread(target=_worker) + t.start() + t.join() + + self.assertEqual(get_errno(), 32) + set_errno(0) + + @unittest.skipUnless(os.name == "nt", 'Test specific to Windows') + def test_GetLastError(self): + dll = WinDLL("kernel32", use_last_error=True) + GetModuleHandle = dll.GetModuleHandleA + GetModuleHandle.argtypes = [c_wchar_p] + + self.assertEqual(0, GetModuleHandle("foo")) + self.assertEqual(get_last_error(), 126) + + self.assertEqual(set_last_error(32), 126) + self.assertEqual(get_last_error(), 32) + + def _worker(): + set_last_error(0) + + dll = WinDLL("kernel32", use_last_error=False) + GetModuleHandle = dll.GetModuleHandleW + GetModuleHandle.argtypes = [c_wchar_p] + GetModuleHandle("bar") + + self.assertEqual(get_last_error(), 0) + + t = threading.Thread(target=_worker) + t.start() + t.join() + + self.assertEqual(get_last_error(), 32) + + set_last_error(0) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_find.py b/third_party/stdlib/ctypes/test/test_find.py new file mode 100644 index 00000000..e7a0a931 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_find.py @@ -0,0 +1,92 @@ +import unittest +import os +import sys +from ctypes import * +from ctypes.util import find_library +from ctypes.test import is_resource_enabled + +if sys.platform == "win32": + lib_gl = find_library("OpenGL32") + lib_glu = find_library("Glu32") + lib_gle = None +elif sys.platform == "darwin": + lib_gl = lib_glu = find_library("OpenGL") + lib_gle = None +else: + lib_gl = find_library("GL") + lib_glu = find_library("GLU") + lib_gle = find_library("gle") + +## print, for debugging +if is_resource_enabled("printing"): + if lib_gl or lib_glu or lib_gle: + print "OpenGL libraries:" + for item in (("GL", lib_gl), + ("GLU", lib_glu), + ("gle", lib_gle)): + print "\t", item + + +# On some systems, loading the OpenGL libraries needs the RTLD_GLOBAL mode. +class Test_OpenGL_libs(unittest.TestCase): + def setUp(self): + self.gl = self.glu = self.gle = None + if lib_gl: + try: + self.gl = CDLL(lib_gl, mode=RTLD_GLOBAL) + except OSError: + pass + if lib_glu: + try: + self.glu = CDLL(lib_glu, RTLD_GLOBAL) + except OSError: + pass + if lib_gle: + try: + self.gle = CDLL(lib_gle) + except OSError: + pass + + def tearDown(self): + self.gl = self.glu = self.gle = None + + @unittest.skipUnless(lib_gl, 'lib_gl not available') + def test_gl(self): + if self.gl: + self.gl.glClearIndex + + @unittest.skipUnless(lib_glu, 'lib_glu not available') + def test_glu(self): + if self.glu: + self.glu.gluBeginCurve + + @unittest.skipUnless(lib_gle, 'lib_gle not available') + def test_gle(self): + if self.gle: + self.gle.gleGetJoinStyle + +# On platforms where the default shared library suffix is '.so', +# at least some libraries can be loaded as attributes of the cdll +# object, since ctypes now tries loading the lib again +# with '.so' appended of the first try fails. +# +# Won't work for libc, unfortunately. OTOH, it isn't +# needed for libc since this is already mapped into the current +# process (?) +# +# On MAC OSX, it won't work either, because dlopen() needs a full path, +# and the default suffix is either none or '.dylib'. +@unittest.skip('test disabled') +@unittest.skipUnless(os.name=="posix" and sys.platform != "darwin", + 'test not suitable for this platform') +class LoadLibs(unittest.TestCase): + def test_libm(self): + import math + libm = cdll.libm + sqrt = libm.sqrt + sqrt.argtypes = (c_double,) + sqrt.restype = c_double + self.assertEqual(sqrt(2), math.sqrt(2)) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_frombuffer.py b/third_party/stdlib/ctypes/test/test_frombuffer.py new file mode 100644 index 00000000..9f4bb28d --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_frombuffer.py @@ -0,0 +1,81 @@ +from ctypes import * +import array +import gc +import unittest + +class X(Structure): + _fields_ = [("c_int", c_int)] + init_called = False + def __init__(self): + self._init_called = True + +class Test(unittest.TestCase): + def test_fom_buffer(self): + a = array.array("i", range(16)) + x = (c_int * 16).from_buffer(a) + + y = X.from_buffer(a) + self.assertEqual(y.c_int, a[0]) + self.assertFalse(y.init_called) + + self.assertEqual(x[:], a.tolist()) + + a[0], a[-1] = 200, -200 + self.assertEqual(x[:], a.tolist()) + + self.assertIn(a, x._objects.values()) + + self.assertRaises(ValueError, + c_int.from_buffer, a, -1) + + expected = x[:] + del a; gc.collect(); gc.collect(); gc.collect() + self.assertEqual(x[:], expected) + + self.assertRaises(TypeError, + (c_char * 16).from_buffer, "a" * 16) + + def test_fom_buffer_with_offset(self): + a = array.array("i", range(16)) + x = (c_int * 15).from_buffer(a, sizeof(c_int)) + + self.assertEqual(x[:], a.tolist()[1:]) + self.assertRaises(ValueError, lambda: (c_int * 16).from_buffer(a, sizeof(c_int))) + self.assertRaises(ValueError, lambda: (c_int * 1).from_buffer(a, 16 * sizeof(c_int))) + + def test_from_buffer_copy(self): + a = array.array("i", range(16)) + x = (c_int * 16).from_buffer_copy(a) + + y = X.from_buffer_copy(a) + self.assertEqual(y.c_int, a[0]) + self.assertFalse(y.init_called) + + self.assertEqual(x[:], range(16)) + + a[0], a[-1] = 200, -200 + self.assertEqual(x[:], range(16)) + + self.assertEqual(x._objects, None) + + self.assertRaises(ValueError, + c_int.from_buffer, a, -1) + + del a; gc.collect(); gc.collect(); gc.collect() + self.assertEqual(x[:], range(16)) + + x = (c_char * 16).from_buffer_copy("a" * 16) + self.assertEqual(x[:], "a" * 16) + + def test_fom_buffer_copy_with_offset(self): + a = array.array("i", range(16)) + x = (c_int * 15).from_buffer_copy(a, sizeof(c_int)) + + self.assertEqual(x[:], a.tolist()[1:]) + self.assertRaises(ValueError, + (c_int * 16).from_buffer_copy, a, sizeof(c_int)) + self.assertRaises(ValueError, + (c_int * 1).from_buffer_copy, a, 16 * sizeof(c_int)) + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_funcptr.py b/third_party/stdlib/ctypes/test/test_funcptr.py new file mode 100644 index 00000000..58cbb47a --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_funcptr.py @@ -0,0 +1,127 @@ +import os, unittest +from ctypes import * + +try: + WINFUNCTYPE +except NameError: + # fake to enable this test on Linux + WINFUNCTYPE = CFUNCTYPE + +import _ctypes_test +lib = CDLL(_ctypes_test.__file__) + +class CFuncPtrTestCase(unittest.TestCase): + def test_basic(self): + X = WINFUNCTYPE(c_int, c_int, c_int) + + def func(*args): + return len(args) + + x = X(func) + self.assertEqual(x.restype, c_int) + self.assertEqual(x.argtypes, (c_int, c_int)) + self.assertEqual(sizeof(x), sizeof(c_voidp)) + self.assertEqual(sizeof(X), sizeof(c_voidp)) + + def test_first(self): + StdCallback = WINFUNCTYPE(c_int, c_int, c_int) + CdeclCallback = CFUNCTYPE(c_int, c_int, c_int) + + def func(a, b): + return a + b + + s = StdCallback(func) + c = CdeclCallback(func) + + self.assertEqual(s(1, 2), 3) + self.assertEqual(c(1, 2), 3) + # The following no longer raises a TypeError - it is now + # possible, as in C, to call cdecl functions with more parameters. + #self.assertRaises(TypeError, c, 1, 2, 3) + self.assertEqual(c(1, 2, 3, 4, 5, 6), 3) + if not WINFUNCTYPE is CFUNCTYPE and os.name != "ce": + self.assertRaises(TypeError, s, 1, 2, 3) + + def test_structures(self): + WNDPROC = WINFUNCTYPE(c_long, c_int, c_int, c_int, c_int) + + def wndproc(hwnd, msg, wParam, lParam): + return hwnd + msg + wParam + lParam + + HINSTANCE = c_int + HICON = c_int + HCURSOR = c_int + LPCTSTR = c_char_p + + class WNDCLASS(Structure): + _fields_ = [("style", c_uint), + ("lpfnWndProc", WNDPROC), + ("cbClsExtra", c_int), + ("cbWndExtra", c_int), + ("hInstance", HINSTANCE), + ("hIcon", HICON), + ("hCursor", HCURSOR), + ("lpszMenuName", LPCTSTR), + ("lpszClassName", LPCTSTR)] + + wndclass = WNDCLASS() + wndclass.lpfnWndProc = WNDPROC(wndproc) + + WNDPROC_2 = WINFUNCTYPE(c_long, c_int, c_int, c_int, c_int) + + # This is no longer true, now that WINFUNCTYPE caches created types internally. + ## # CFuncPtr subclasses are compared by identity, so this raises a TypeError: + ## self.assertRaises(TypeError, setattr, wndclass, + ## "lpfnWndProc", WNDPROC_2(wndproc)) + # instead: + + self.assertIs(WNDPROC, WNDPROC_2) + # 'wndclass.lpfnWndProc' leaks 94 references. Why? + self.assertEqual(wndclass.lpfnWndProc(1, 2, 3, 4), 10) + + + f = wndclass.lpfnWndProc + + del wndclass + del wndproc + + self.assertEqual(f(10, 11, 12, 13), 46) + + def test_dllfunctions(self): + + def NoNullHandle(value): + if not value: + raise WinError() + return value + + strchr = lib.my_strchr + strchr.restype = c_char_p + strchr.argtypes = (c_char_p, c_char) + self.assertEqual(strchr("abcdefghi", "b"), "bcdefghi") + self.assertEqual(strchr("abcdefghi", "x"), None) + + + strtok = lib.my_strtok + strtok.restype = c_char_p + # Neither of this does work: strtok changes the buffer it is passed +## strtok.argtypes = (c_char_p, c_char_p) +## strtok.argtypes = (c_string, c_char_p) + + def c_string(init): + size = len(init) + 1 + return (c_char*size)(*init) + + s = "a\nb\nc" + b = c_string(s) + +## b = (c_char * (len(s)+1))() +## b.value = s + +## b = c_string(s) + self.assertEqual(strtok(b, "\n"), "a") + self.assertEqual(strtok(None, "\n"), "b") + self.assertEqual(strtok(None, "\n"), "c") + self.assertEqual(strtok(None, "\n"), None) + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_functions.py b/third_party/stdlib/ctypes/test/test_functions.py new file mode 100644 index 00000000..a3744157 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_functions.py @@ -0,0 +1,402 @@ +""" +Here is probably the place to write the docs, since the test-cases +show how the type behave. + +Later... +""" + +from ctypes import * +from ctypes.test import need_symbol +import sys, unittest + +try: + WINFUNCTYPE +except NameError: + # fake to enable this test on Linux + WINFUNCTYPE = CFUNCTYPE + +import _ctypes_test +dll = CDLL(_ctypes_test.__file__) +if sys.platform == "win32": + windll = WinDLL(_ctypes_test.__file__) + +class POINT(Structure): + _fields_ = [("x", c_int), ("y", c_int)] +class RECT(Structure): + _fields_ = [("left", c_int), ("top", c_int), + ("right", c_int), ("bottom", c_int)] +class FunctionTestCase(unittest.TestCase): + + def test_mro(self): + # in Python 2.3, this raises TypeError: MRO conflict among bases classes, + # in Python 2.2 it works. + # + # But in early versions of _ctypes.c, the result of tp_new + # wasn't checked, and it even crashed Python. + # Found by Greg Chapman. + + try: + class X(object, Array): + _length_ = 5 + _type_ = "i" + except TypeError: + pass + + + from _ctypes import _Pointer + try: + class X(object, _Pointer): + pass + except TypeError: + pass + + from _ctypes import _SimpleCData + try: + class X(object, _SimpleCData): + _type_ = "i" + except TypeError: + pass + + try: + class X(object, Structure): + _fields_ = [] + except TypeError: + pass + + + @need_symbol('c_wchar') + def test_wchar_parm(self): + f = dll._testfunc_i_bhilfd + f.argtypes = [c_byte, c_wchar, c_int, c_long, c_float, c_double] + result = f(1, u"x", 3, 4, 5.0, 6.0) + self.assertEqual(result, 139) + self.assertEqual(type(result), int) + + @need_symbol('c_wchar') + def test_wchar_result(self): + f = dll._testfunc_i_bhilfd + f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double] + f.restype = c_wchar + result = f(0, 0, 0, 0, 0, 0) + self.assertEqual(result, u'\x00') + + def test_voidresult(self): + f = dll._testfunc_v + f.restype = None + f.argtypes = [c_int, c_int, POINTER(c_int)] + result = c_int() + self.assertEqual(None, f(1, 2, byref(result))) + self.assertEqual(result.value, 3) + + def test_intresult(self): + f = dll._testfunc_i_bhilfd + f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double] + f.restype = c_int + result = f(1, 2, 3, 4, 5.0, 6.0) + self.assertEqual(result, 21) + self.assertEqual(type(result), int) + + result = f(-1, -2, -3, -4, -5.0, -6.0) + self.assertEqual(result, -21) + self.assertEqual(type(result), int) + + # If we declare the function to return a short, + # is the high part split off? + f.restype = c_short + result = f(1, 2, 3, 4, 5.0, 6.0) + self.assertEqual(result, 21) + self.assertEqual(type(result), int) + + result = f(1, 2, 3, 0x10004, 5.0, 6.0) + self.assertEqual(result, 21) + self.assertEqual(type(result), int) + + # You cannot assign character format codes as restype any longer + self.assertRaises(TypeError, setattr, f, "restype", "i") + + def test_floatresult(self): + f = dll._testfunc_f_bhilfd + f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double] + f.restype = c_float + result = f(1, 2, 3, 4, 5.0, 6.0) + self.assertEqual(result, 21) + self.assertEqual(type(result), float) + + result = f(-1, -2, -3, -4, -5.0, -6.0) + self.assertEqual(result, -21) + self.assertEqual(type(result), float) + + def test_doubleresult(self): + f = dll._testfunc_d_bhilfd + f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double] + f.restype = c_double + result = f(1, 2, 3, 4, 5.0, 6.0) + self.assertEqual(result, 21) + self.assertEqual(type(result), float) + + result = f(-1, -2, -3, -4, -5.0, -6.0) + self.assertEqual(result, -21) + self.assertEqual(type(result), float) + + def test_longdoubleresult(self): + f = dll._testfunc_D_bhilfD + f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_longdouble] + f.restype = c_longdouble + result = f(1, 2, 3, 4, 5.0, 6.0) + self.assertEqual(result, 21) + self.assertEqual(type(result), float) + + result = f(-1, -2, -3, -4, -5.0, -6.0) + self.assertEqual(result, -21) + self.assertEqual(type(result), float) + + @need_symbol('c_longlong') + def test_longlongresult(self): + f = dll._testfunc_q_bhilfd + f.restype = c_longlong + f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double] + result = f(1, 2, 3, 4, 5.0, 6.0) + self.assertEqual(result, 21) + + f = dll._testfunc_q_bhilfdq + f.restype = c_longlong + f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double, c_longlong] + result = f(1, 2, 3, 4, 5.0, 6.0, 21) + self.assertEqual(result, 42) + + def test_stringresult(self): + f = dll._testfunc_p_p + f.argtypes = None + f.restype = c_char_p + result = f("123") + self.assertEqual(result, "123") + + result = f(None) + self.assertEqual(result, None) + + def test_pointers(self): + f = dll._testfunc_p_p + f.restype = POINTER(c_int) + f.argtypes = [POINTER(c_int)] + + # This only works if the value c_int(42) passed to the + # function is still alive while the pointer (the result) is + # used. + + v = c_int(42) + + self.assertEqual(pointer(v).contents.value, 42) + result = f(pointer(v)) + self.assertEqual(type(result), POINTER(c_int)) + self.assertEqual(result.contents.value, 42) + + # This on works... + result = f(pointer(v)) + self.assertEqual(result.contents.value, v.value) + + p = pointer(c_int(99)) + result = f(p) + self.assertEqual(result.contents.value, 99) + + arg = byref(v) + result = f(arg) + self.assertNotEqual(result.contents, v.value) + + self.assertRaises(ArgumentError, f, byref(c_short(22))) + + # It is dangerous, however, because you don't control the lifetime + # of the pointer: + result = f(byref(c_int(99))) + self.assertNotEqual(result.contents, 99) + + def test_errors(self): + f = dll._testfunc_p_p + f.restype = c_int + + class X(Structure): + _fields_ = [("y", c_int)] + + self.assertRaises(TypeError, f, X()) #cannot convert parameter + + ################################################################ + def test_shorts(self): + f = dll._testfunc_callback_i_if + + args = [] + expected = [262144, 131072, 65536, 32768, 16384, 8192, 4096, 2048, + 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1] + + def callback(v): + args.append(v) + return v + + CallBack = CFUNCTYPE(c_int, c_int) + + cb = CallBack(callback) + f(2**18, cb) + self.assertEqual(args, expected) + + ################################################################ + + + def test_callbacks(self): + f = dll._testfunc_callback_i_if + f.restype = c_int + f.argtypes = None + + MyCallback = CFUNCTYPE(c_int, c_int) + + def callback(value): + #print "called back with", value + return value + + cb = MyCallback(callback) + result = f(-10, cb) + self.assertEqual(result, -18) + + # test with prototype + f.argtypes = [c_int, MyCallback] + cb = MyCallback(callback) + result = f(-10, cb) + self.assertEqual(result, -18) + + AnotherCallback = WINFUNCTYPE(c_int, c_int, c_int, c_int, c_int) + + # check that the prototype works: we call f with wrong + # argument types + cb = AnotherCallback(callback) + self.assertRaises(ArgumentError, f, -10, cb) + + + def test_callbacks_2(self): + # Can also use simple datatypes as argument type specifiers + # for the callback function. + # In this case the call receives an instance of that type + f = dll._testfunc_callback_i_if + f.restype = c_int + + MyCallback = CFUNCTYPE(c_int, c_int) + + f.argtypes = [c_int, MyCallback] + + def callback(value): + #print "called back with", value + self.assertEqual(type(value), int) + return value + + cb = MyCallback(callback) + result = f(-10, cb) + self.assertEqual(result, -18) + + @need_symbol('c_longlong') + def test_longlong_callbacks(self): + + f = dll._testfunc_callback_q_qf + f.restype = c_longlong + + MyCallback = CFUNCTYPE(c_longlong, c_longlong) + + f.argtypes = [c_longlong, MyCallback] + + def callback(value): + self.assertIsInstance(value, (int, long)) + return value & 0x7FFFFFFF + + cb = MyCallback(callback) + + self.assertEqual(13577625587, f(1000000000000, cb)) + + def test_errors(self): + self.assertRaises(AttributeError, getattr, dll, "_xxx_yyy") + self.assertRaises(ValueError, c_int.in_dll, dll, "_xxx_yyy") + + def test_byval(self): + + # without prototype + ptin = POINT(1, 2) + ptout = POINT() + # EXPORT int _testfunc_byval(point in, point *pout) + result = dll._testfunc_byval(ptin, byref(ptout)) + got = result, ptout.x, ptout.y + expected = 3, 1, 2 + self.assertEqual(got, expected) + + # with prototype + ptin = POINT(101, 102) + ptout = POINT() + dll._testfunc_byval.argtypes = (POINT, POINTER(POINT)) + dll._testfunc_byval.restype = c_int + result = dll._testfunc_byval(ptin, byref(ptout)) + got = result, ptout.x, ptout.y + expected = 203, 101, 102 + self.assertEqual(got, expected) + + def test_struct_return_2H(self): + class S2H(Structure): + _fields_ = [("x", c_short), + ("y", c_short)] + dll.ret_2h_func.restype = S2H + dll.ret_2h_func.argtypes = [S2H] + inp = S2H(99, 88) + s2h = dll.ret_2h_func(inp) + self.assertEqual((s2h.x, s2h.y), (99*2, 88*3)) + + @unittest.skipUnless(sys.platform == "win32", 'Windows-specific test') + def test_struct_return_2H_stdcall(self): + class S2H(Structure): + _fields_ = [("x", c_short), + ("y", c_short)] + + windll.s_ret_2h_func.restype = S2H + windll.s_ret_2h_func.argtypes = [S2H] + s2h = windll.s_ret_2h_func(S2H(99, 88)) + self.assertEqual((s2h.x, s2h.y), (99*2, 88*3)) + + def test_struct_return_8H(self): + class S8I(Structure): + _fields_ = [("a", c_int), + ("b", c_int), + ("c", c_int), + ("d", c_int), + ("e", c_int), + ("f", c_int), + ("g", c_int), + ("h", c_int)] + dll.ret_8i_func.restype = S8I + dll.ret_8i_func.argtypes = [S8I] + inp = S8I(9, 8, 7, 6, 5, 4, 3, 2) + s8i = dll.ret_8i_func(inp) + self.assertEqual((s8i.a, s8i.b, s8i.c, s8i.d, s8i.e, s8i.f, s8i.g, s8i.h), + (9*2, 8*3, 7*4, 6*5, 5*6, 4*7, 3*8, 2*9)) + + @unittest.skipUnless(sys.platform == "win32", 'Windows-specific test') + def test_struct_return_8H_stdcall(self): + class S8I(Structure): + _fields_ = [("a", c_int), + ("b", c_int), + ("c", c_int), + ("d", c_int), + ("e", c_int), + ("f", c_int), + ("g", c_int), + ("h", c_int)] + windll.s_ret_8i_func.restype = S8I + windll.s_ret_8i_func.argtypes = [S8I] + inp = S8I(9, 8, 7, 6, 5, 4, 3, 2) + s8i = windll.s_ret_8i_func(inp) + self.assertEqual( + (s8i.a, s8i.b, s8i.c, s8i.d, s8i.e, s8i.f, s8i.g, s8i.h), + (9*2, 8*3, 7*4, 6*5, 5*6, 4*7, 3*8, 2*9)) + + def test_sf1651235(self): + # see http://www.python.org/sf/1651235 + + proto = CFUNCTYPE(c_int, RECT, POINT) + def callback(*args): + return 0 + + callback = proto(callback) + self.assertRaises(ArgumentError, lambda: callback((1, 2, 3, 4), POINT())) + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_incomplete.py b/third_party/stdlib/ctypes/test/test_incomplete.py new file mode 100644 index 00000000..1e03e9fc --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_incomplete.py @@ -0,0 +1,42 @@ +import unittest +from ctypes import * + +################################################################ +# +# The incomplete pointer example from the tutorial +# + +class MyTestCase(unittest.TestCase): + + def test_incomplete_example(self): + lpcell = POINTER("cell") + class cell(Structure): + _fields_ = [("name", c_char_p), + ("next", lpcell)] + + SetPointerType(lpcell, cell) + + c1 = cell() + c1.name = "foo" + c2 = cell() + c2.name = "bar" + + c1.next = pointer(c2) + c2.next = pointer(c1) + + p = c1 + + result = [] + for i in range(8): + result.append(p.name) + p = p.next[0] + self.assertEqual(result, ["foo", "bar"] * 4) + + # to not leak references, we must clean _pointer_type_cache + from ctypes import _pointer_type_cache + del _pointer_type_cache[cell] + +################################################################ + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_init.py b/third_party/stdlib/ctypes/test/test_init.py new file mode 100644 index 00000000..82bd1f99 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_init.py @@ -0,0 +1,40 @@ +from ctypes import * +import unittest + +class X(Structure): + _fields_ = [("a", c_int), + ("b", c_int)] + new_was_called = False + + def __new__(cls): + result = super(X, cls).__new__(cls) + result.new_was_called = True + return result + + def __init__(self): + self.a = 9 + self.b = 12 + +class Y(Structure): + _fields_ = [("x", X)] + + +class InitTest(unittest.TestCase): + def test_get(self): + # make sure the only accessing a nested structure + # doesn't call the structure's __new__ and __init__ + y = Y() + self.assertEqual((y.x.a, y.x.b), (0, 0)) + self.assertEqual(y.x.new_was_called, False) + + # But explicitly creating an X structure calls __new__ and __init__, of course. + x = X() + self.assertEqual((x.a, x.b), (9, 12)) + self.assertEqual(x.new_was_called, True) + + y.x = x + self.assertEqual((y.x.a, y.x.b), (9, 12)) + self.assertEqual(y.x.new_was_called, False) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_internals.py b/third_party/stdlib/ctypes/test/test_internals.py new file mode 100644 index 00000000..2e5b1fed --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_internals.py @@ -0,0 +1,103 @@ +# This tests the internal _objects attribute +import unittest +from ctypes import * +from sys import getrefcount as grc + +# XXX This test must be reviewed for correctness!!! + +""" +ctypes' types are container types. + +They have an internal memory block, which only consists of some bytes, +but it has to keep references to other objects as well. This is not +really needed for trivial C types like int or char, but it is important +for aggregate types like strings or pointers in particular. + +What about pointers? + +""" + +class ObjectsTestCase(unittest.TestCase): + def assertSame(self, a, b): + self.assertEqual(id(a), id(b)) + + def test_ints(self): + i = 42000123 + refcnt = grc(i) + ci = c_int(i) + self.assertEqual(refcnt, grc(i)) + self.assertEqual(ci._objects, None) + + def test_c_char_p(self): + s = "Hello, World" + refcnt = grc(s) + cs = c_char_p(s) + self.assertEqual(refcnt + 1, grc(s)) + self.assertSame(cs._objects, s) + + def test_simple_struct(self): + class X(Structure): + _fields_ = [("a", c_int), ("b", c_int)] + + a = 421234 + b = 421235 + x = X() + self.assertEqual(x._objects, None) + x.a = a + x.b = b + self.assertEqual(x._objects, None) + + def test_embedded_structs(self): + class X(Structure): + _fields_ = [("a", c_int), ("b", c_int)] + + class Y(Structure): + _fields_ = [("x", X), ("y", X)] + + y = Y() + self.assertEqual(y._objects, None) + + x1, x2 = X(), X() + y.x, y.y = x1, x2 + self.assertEqual(y._objects, {"0": {}, "1": {}}) + x1.a, x2.b = 42, 93 + self.assertEqual(y._objects, {"0": {}, "1": {}}) + + def test_xxx(self): + class X(Structure): + _fields_ = [("a", c_char_p), ("b", c_char_p)] + + class Y(Structure): + _fields_ = [("x", X), ("y", X)] + + s1 = "Hello, World" + s2 = "Hallo, Welt" + + x = X() + x.a = s1 + x.b = s2 + self.assertEqual(x._objects, {"0": s1, "1": s2}) + + y = Y() + y.x = x + self.assertEqual(y._objects, {"0": {"0": s1, "1": s2}}) +## x = y.x +## del y +## print x._b_base_._objects + + def test_ptr_struct(self): + class X(Structure): + _fields_ = [("data", POINTER(c_int))] + + A = c_int*4 + a = A(11, 22, 33, 44) + self.assertEqual(a._objects, None) + + x = X() + x.data = a +##XXX print x._objects +##XXX print x.data[0] +##XXX print x.data._objects + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_keeprefs.py b/third_party/stdlib/ctypes/test/test_keeprefs.py new file mode 100644 index 00000000..b2a50ab7 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_keeprefs.py @@ -0,0 +1,153 @@ +from ctypes import * +import unittest + +class SimpleTestCase(unittest.TestCase): + def test_cint(self): + x = c_int() + self.assertEqual(x._objects, None) + x.value = 42 + self.assertEqual(x._objects, None) + x = c_int(99) + self.assertEqual(x._objects, None) + + def test_ccharp(self): + x = c_char_p() + self.assertEqual(x._objects, None) + x.value = "abc" + self.assertEqual(x._objects, "abc") + x = c_char_p("spam") + self.assertEqual(x._objects, "spam") + +class StructureTestCase(unittest.TestCase): + def test_cint_struct(self): + class X(Structure): + _fields_ = [("a", c_int), + ("b", c_int)] + + x = X() + self.assertEqual(x._objects, None) + x.a = 42 + x.b = 99 + self.assertEqual(x._objects, None) + + def test_ccharp_struct(self): + class X(Structure): + _fields_ = [("a", c_char_p), + ("b", c_char_p)] + x = X() + self.assertEqual(x._objects, None) + + x.a = "spam" + x.b = "foo" + self.assertEqual(x._objects, {"0": "spam", "1": "foo"}) + + def test_struct_struct(self): + class POINT(Structure): + _fields_ = [("x", c_int), ("y", c_int)] + class RECT(Structure): + _fields_ = [("ul", POINT), ("lr", POINT)] + + r = RECT() + r.ul.x = 0 + r.ul.y = 1 + r.lr.x = 2 + r.lr.y = 3 + self.assertEqual(r._objects, None) + + r = RECT() + pt = POINT(1, 2) + r.ul = pt + self.assertEqual(r._objects, {'0': {}}) + r.ul.x = 22 + r.ul.y = 44 + self.assertEqual(r._objects, {'0': {}}) + r.lr = POINT() + self.assertEqual(r._objects, {'0': {}, '1': {}}) + +class ArrayTestCase(unittest.TestCase): + def test_cint_array(self): + INTARR = c_int * 3 + + ia = INTARR() + self.assertEqual(ia._objects, None) + ia[0] = 1 + ia[1] = 2 + ia[2] = 3 + self.assertEqual(ia._objects, None) + + class X(Structure): + _fields_ = [("x", c_int), + ("a", INTARR)] + + x = X() + x.x = 1000 + x.a[0] = 42 + x.a[1] = 96 + self.assertEqual(x._objects, None) + x.a = ia + self.assertEqual(x._objects, {'1': {}}) + +class PointerTestCase(unittest.TestCase): + def test_p_cint(self): + i = c_int(42) + x = pointer(i) + self.assertEqual(x._objects, {'1': i}) + +class DeletePointerTestCase(unittest.TestCase): + @unittest.skip('test disabled') + def test_X(self): + class X(Structure): + _fields_ = [("p", POINTER(c_char_p))] + x = X() + i = c_char_p("abc def") + from sys import getrefcount as grc + print "2?", grc(i) + x.p = pointer(i) + print "3?", grc(i) + for i in range(320): + c_int(99) + x.p[0] + print x.p[0] +## del x +## print "2?", grc(i) +## del i + import gc + gc.collect() + for i in range(320): + c_int(99) + x.p[0] + print x.p[0] + print x.p.contents +## print x._objects + + x.p[0] = "spam spam" +## print x.p[0] + print "+" * 42 + print x._objects + +class PointerToStructure(unittest.TestCase): + def test(self): + class POINT(Structure): + _fields_ = [("x", c_int), ("y", c_int)] + class RECT(Structure): + _fields_ = [("a", POINTER(POINT)), + ("b", POINTER(POINT))] + r = RECT() + p1 = POINT(1, 2) + + r.a = pointer(p1) + r.b = pointer(p1) +## from pprint import pprint as pp +## pp(p1._objects) +## pp(r._objects) + + r.a[0].x = 42 + r.a[0].y = 99 + + # to avoid leaking when tests are run several times + # clean up the types left in the cache. + from ctypes import _pointer_type_cache + del _pointer_type_cache[POINT] + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_libc.py b/third_party/stdlib/ctypes/test/test_libc.py new file mode 100644 index 00000000..3dc463fc --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_libc.py @@ -0,0 +1,29 @@ +import unittest + +from ctypes import * +import _ctypes_test + +lib = CDLL(_ctypes_test.__file__) + +class LibTest(unittest.TestCase): + def test_sqrt(self): + lib.my_sqrt.argtypes = c_double, + lib.my_sqrt.restype = c_double + self.assertEqual(lib.my_sqrt(4.0), 2.0) + import math + self.assertEqual(lib.my_sqrt(2.0), math.sqrt(2.0)) + + def test_qsort(self): + comparefunc = CFUNCTYPE(c_int, POINTER(c_char), POINTER(c_char)) + lib.my_qsort.argtypes = c_void_p, c_size_t, c_size_t, comparefunc + lib.my_qsort.restype = None + + def sort(a, b): + return cmp(a[0], b[0]) + + chars = create_string_buffer("spam, spam, and spam") + lib.my_qsort(chars, len(chars)-1, sizeof(c_char), comparefunc(sort)) + self.assertEqual(chars.raw, " ,,aaaadmmmnpppsss\x00") + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_loading.py b/third_party/stdlib/ctypes/test/test_loading.py new file mode 100644 index 00000000..81a27e35 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_loading.py @@ -0,0 +1,114 @@ +from ctypes import * +import sys, unittest +import os +from ctypes.util import find_library +from ctypes.test import is_resource_enabled + +libc_name = None +if os.name == "nt": + libc_name = find_library("c") +elif os.name == "ce": + libc_name = "coredll" +elif sys.platform == "cygwin": + libc_name = "cygwin1.dll" +else: + libc_name = find_library("c") + +if is_resource_enabled("printing"): + print "libc_name is", libc_name + +class LoaderTest(unittest.TestCase): + + unknowndll = "xxrandomnamexx" + + @unittest.skipUnless(libc_name is not None, 'could not find libc') + def test_load(self): + CDLL(libc_name) + CDLL(os.path.basename(libc_name)) + self.assertRaises(OSError, CDLL, self.unknowndll) + + @unittest.skipUnless(libc_name is not None, 'could not find libc') + @unittest.skipUnless(libc_name is not None and + os.path.basename(libc_name) == "libc.so.6", + 'wrong libc path for test') + def test_load_version(self): + cdll.LoadLibrary("libc.so.6") + # linux uses version, libc 9 should not exist + self.assertRaises(OSError, cdll.LoadLibrary, "libc.so.9") + self.assertRaises(OSError, cdll.LoadLibrary, self.unknowndll) + + def test_find(self): + for name in ("c", "m"): + lib = find_library(name) + if lib: + cdll.LoadLibrary(lib) + CDLL(lib) + + @unittest.skipUnless(os.name in ("nt", "ce"), + 'test specific to Windows (NT/CE)') + def test_load_library(self): + self.assertIsNotNone(libc_name) + if is_resource_enabled("printing"): + print find_library("kernel32") + print find_library("user32") + + if os.name == "nt": + windll.kernel32.GetModuleHandleW + windll["kernel32"].GetModuleHandleW + windll.LoadLibrary("kernel32").GetModuleHandleW + WinDLL("kernel32").GetModuleHandleW + elif os.name == "ce": + windll.coredll.GetModuleHandleW + windll["coredll"].GetModuleHandleW + windll.LoadLibrary("coredll").GetModuleHandleW + WinDLL("coredll").GetModuleHandleW + + @unittest.skipUnless(os.name in ("nt", "ce"), + 'test specific to Windows (NT/CE)') + def test_load_ordinal_functions(self): + import _ctypes_test + dll = WinDLL(_ctypes_test.__file__) + # We load the same function both via ordinal and name + func_ord = dll[2] + func_name = dll.GetString + # addressof gets the address where the function pointer is stored + a_ord = addressof(func_ord) + a_name = addressof(func_name) + f_ord_addr = c_void_p.from_address(a_ord).value + f_name_addr = c_void_p.from_address(a_name).value + self.assertEqual(hex(f_ord_addr), hex(f_name_addr)) + + self.assertRaises(AttributeError, dll.__getitem__, 1234) + + @unittest.skipUnless(os.name == "nt", 'Windows-specific test') + def test_1703286_A(self): + from _ctypes import LoadLibrary, FreeLibrary + # On winXP 64-bit, advapi32 loads at an address that does + # NOT fit into a 32-bit integer. FreeLibrary must be able + # to accept this address. + + # These are tests for http://www.python.org/sf/1703286 + handle = LoadLibrary("advapi32") + FreeLibrary(handle) + + @unittest.skipUnless(os.name == "nt", 'Windows-specific test') + def test_1703286_B(self): + # Since on winXP 64-bit advapi32 loads like described + # above, the (arbitrarily selected) CloseEventLog function + # also has a high address. 'call_function' should accept + # addresses so large. + from _ctypes import call_function + advapi32 = windll.advapi32 + # Calling CloseEventLog with a NULL argument should fail, + # but the call should not segfault or so. + self.assertEqual(0, advapi32.CloseEventLog(None)) + windll.kernel32.GetProcAddress.argtypes = c_void_p, c_char_p + windll.kernel32.GetProcAddress.restype = c_void_p + proc = windll.kernel32.GetProcAddress(advapi32._handle, + "CloseEventLog") + self.assertTrue(proc) + # This is the real test: call the function via 'call_function' + self.assertEqual(0, call_function(proc, (None,))) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_macholib.py b/third_party/stdlib/ctypes/test/test_macholib.py new file mode 100644 index 00000000..9779b2f3 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_macholib.py @@ -0,0 +1,65 @@ +import os +import sys +import unittest + +# Bob Ippolito: +""" +Ok.. the code to find the filename for __getattr__ should look +something like: + +import os +from macholib.dyld import dyld_find + +def find_lib(name): + possible = ['lib'+name+'.dylib', name+'.dylib', + name+'.framework/'+name] + for dylib in possible: + try: + return os.path.realpath(dyld_find(dylib)) + except ValueError: + pass + raise ValueError, "%s not found" % (name,) + +It'll have output like this: + + >>> find_lib('pthread') +'/usr/lib/libSystem.B.dylib' + >>> find_lib('z') +'/usr/lib/libz.1.dylib' + >>> find_lib('IOKit') +'/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit' + +-bob + +""" + +from ctypes.macholib.dyld import dyld_find + +def find_lib(name): + possible = ['lib'+name+'.dylib', name+'.dylib', name+'.framework/'+name] + for dylib in possible: + try: + return os.path.realpath(dyld_find(dylib)) + except ValueError: + pass + raise ValueError("%s not found" % (name,)) + +class MachOTest(unittest.TestCase): + @unittest.skipUnless(sys.platform == "darwin", 'OSX-specific test') + def test_find(self): + + self.assertEqual(find_lib('pthread'), + '/usr/lib/libSystem.B.dylib') + + result = find_lib('z') + # Issue #21093: dyld default search path includes $HOME/lib and + # /usr/local/lib before /usr/lib, which caused test failures if + # a local copy of libz exists in one of them. Now ignore the head + # of the path. + self.assertRegexpMatches(result, r".*/lib/libz\..*.*\.dylib") + + self.assertEqual(find_lib('IOKit'), + '/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit') + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_memfunctions.py b/third_party/stdlib/ctypes/test/test_memfunctions.py new file mode 100644 index 00000000..ba9eea6f --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_memfunctions.py @@ -0,0 +1,77 @@ +import sys +import unittest +from ctypes import * +from ctypes.test import need_symbol + +class MemFunctionsTest(unittest.TestCase): + @unittest.skip('test disabled') + def test_overflow(self): + # string_at and wstring_at must use the Python calling + # convention (which acquires the GIL and checks the Python + # error flag). Provoke an error and catch it; see also issue + # #3554: + self.assertRaises((OverflowError, MemoryError, SystemError), + lambda: wstring_at(u"foo", sys.maxint - 1)) + self.assertRaises((OverflowError, MemoryError, SystemError), + lambda: string_at("foo", sys.maxint - 1)) + + def test_memmove(self): + # large buffers apparently increase the chance that the memory + # is allocated in high address space. + a = create_string_buffer(1000000) + p = "Hello, World" + result = memmove(a, p, len(p)) + self.assertEqual(a.value, "Hello, World") + + self.assertEqual(string_at(result), "Hello, World") + self.assertEqual(string_at(result, 5), "Hello") + self.assertEqual(string_at(result, 16), "Hello, World\0\0\0\0") + self.assertEqual(string_at(result, 0), "") + + def test_memset(self): + a = create_string_buffer(1000000) + result = memset(a, ord('x'), 16) + self.assertEqual(a.value, "xxxxxxxxxxxxxxxx") + + self.assertEqual(string_at(result), "xxxxxxxxxxxxxxxx") + self.assertEqual(string_at(a), "xxxxxxxxxxxxxxxx") + self.assertEqual(string_at(a, 20), "xxxxxxxxxxxxxxxx\0\0\0\0") + + def test_cast(self): + a = (c_ubyte * 32)(*map(ord, "abcdef")) + self.assertEqual(cast(a, c_char_p).value, "abcdef") + self.assertEqual(cast(a, POINTER(c_byte))[:7], + [97, 98, 99, 100, 101, 102, 0]) + self.assertEqual(cast(a, POINTER(c_byte))[:7:], + [97, 98, 99, 100, 101, 102, 0]) + self.assertEqual(cast(a, POINTER(c_byte))[6:-1:-1], + [0, 102, 101, 100, 99, 98, 97]) + self.assertEqual(cast(a, POINTER(c_byte))[:7:2], + [97, 99, 101, 0]) + self.assertEqual(cast(a, POINTER(c_byte))[:7:7], + [97]) + + def test_string_at(self): + s = string_at("foo bar") + # XXX The following may be wrong, depending on how Python + # manages string instances + self.assertEqual(2, sys.getrefcount(s)) + self.assertTrue(s, "foo bar") + + self.assertEqual(string_at("foo bar", 8), "foo bar\0") + self.assertEqual(string_at("foo bar", 3), "foo") + + @need_symbol('create_unicode_buffer') + def test_wstring_at(self): + p = create_unicode_buffer("Hello, World") + a = create_unicode_buffer(1000000) + result = memmove(a, p, len(p) * sizeof(c_wchar)) + self.assertEqual(a.value, "Hello, World") + + self.assertEqual(wstring_at(a), "Hello, World") + self.assertEqual(wstring_at(a, 5), "Hello") + self.assertEqual(wstring_at(a, 16), "Hello, World\0\0\0\0") + self.assertEqual(wstring_at(a, 0), "") + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_numbers.py b/third_party/stdlib/ctypes/test/test_numbers.py new file mode 100644 index 00000000..d623465b --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_numbers.py @@ -0,0 +1,289 @@ +from ctypes import * +import unittest +import struct + +def valid_ranges(*types): + # given a sequence of numeric types, collect their _type_ + # attribute, which is a single format character compatible with + # the struct module, use the struct module to calculate the + # minimum and maximum value allowed for this format. + # Returns a list of (min, max) values. + result = [] + for t in types: + fmt = t._type_ + size = struct.calcsize(fmt) + a = struct.unpack(fmt, ("\x00"*32)[:size])[0] + b = struct.unpack(fmt, ("\xFF"*32)[:size])[0] + c = struct.unpack(fmt, ("\x7F"+"\x00"*32)[:size])[0] + d = struct.unpack(fmt, ("\x80"+"\xFF"*32)[:size])[0] + result.append((min(a, b, c, d), max(a, b, c, d))) + return result + +ArgType = type(byref(c_int(0))) + +unsigned_types = [c_ubyte, c_ushort, c_uint, c_ulong] +signed_types = [c_byte, c_short, c_int, c_long, c_longlong] + +bool_types = [] + +float_types = [c_double, c_float] + +try: + c_ulonglong + c_longlong +except NameError: + pass +else: + unsigned_types.append(c_ulonglong) + signed_types.append(c_longlong) + +try: + c_bool +except NameError: + pass +else: + bool_types.append(c_bool) + +unsigned_ranges = valid_ranges(*unsigned_types) +signed_ranges = valid_ranges(*signed_types) +bool_values = [True, False, 0, 1, -1, 5000, 'test', [], [1]] + +################################################################ + +class NumberTestCase(unittest.TestCase): + + def test_default_init(self): + # default values are set to zero + for t in signed_types + unsigned_types + float_types: + self.assertEqual(t().value, 0) + + def test_unsigned_values(self): + # the value given to the constructor is available + # as the 'value' attribute + for t, (l, h) in zip(unsigned_types, unsigned_ranges): + self.assertEqual(t(l).value, l) + self.assertEqual(t(h).value, h) + + def test_signed_values(self): + # see above + for t, (l, h) in zip(signed_types, signed_ranges): + self.assertEqual(t(l).value, l) + self.assertEqual(t(h).value, h) + + def test_bool_values(self): + from operator import truth + for t, v in zip(bool_types, bool_values): + self.assertEqual(t(v).value, truth(v)) + + def test_typeerror(self): + # Only numbers are allowed in the contructor, + # otherwise TypeError is raised + for t in signed_types + unsigned_types + float_types: + self.assertRaises(TypeError, t, "") + self.assertRaises(TypeError, t, None) + + @unittest.skip('test disabled') + def test_valid_ranges(self): + # invalid values of the correct type + # raise ValueError (not OverflowError) + for t, (l, h) in zip(unsigned_types, unsigned_ranges): + self.assertRaises(ValueError, t, l-1) + self.assertRaises(ValueError, t, h+1) + + def test_from_param(self): + # the from_param class method attribute always + # returns PyCArgObject instances + for t in signed_types + unsigned_types + float_types: + self.assertEqual(ArgType, type(t.from_param(0))) + + def test_byref(self): + # calling byref returns also a PyCArgObject instance + for t in signed_types + unsigned_types + float_types + bool_types: + parm = byref(t()) + self.assertEqual(ArgType, type(parm)) + + + def test_floats(self): + # c_float and c_double can be created from + # Python int, long and float + class FloatLike(object): + def __float__(self): + return 2.0 + f = FloatLike() + for t in float_types: + self.assertEqual(t(2.0).value, 2.0) + self.assertEqual(t(2).value, 2.0) + self.assertEqual(t(2L).value, 2.0) + self.assertEqual(t(f).value, 2.0) + + def test_integers(self): + class FloatLike(object): + def __float__(self): + return 2.0 + f = FloatLike() + class IntLike(object): + def __int__(self): + return 2 + i = IntLike() + # integers cannot be constructed from floats, + # but from integer-like objects + for t in signed_types + unsigned_types: + self.assertRaises(TypeError, t, 3.14) + self.assertRaises(TypeError, t, f) + self.assertEqual(t(i).value, 2) + + def test_sizes(self): + for t in signed_types + unsigned_types + float_types + bool_types: + try: + size = struct.calcsize(t._type_) + except struct.error: + continue + # sizeof of the type... + self.assertEqual(sizeof(t), size) + # and sizeof of an instance + self.assertEqual(sizeof(t()), size) + + def test_alignments(self): + for t in signed_types + unsigned_types + float_types: + code = t._type_ # the typecode + align = struct.calcsize("c%c" % code) - struct.calcsize(code) + + # alignment of the type... + self.assertEqual((code, alignment(t)), + (code, align)) + # and alignment of an instance + self.assertEqual((code, alignment(t())), + (code, align)) + + def test_int_from_address(self): + from array import array + for t in signed_types + unsigned_types: + # the array module doesn't support all format codes + # (no 'q' or 'Q') + try: + array(t._type_) + except ValueError: + continue + a = array(t._type_, [100]) + + # v now is an integer at an 'external' memory location + v = t.from_address(a.buffer_info()[0]) + self.assertEqual(v.value, a[0]) + self.assertEqual(type(v), t) + + # changing the value at the memory location changes v's value also + a[0] = 42 + self.assertEqual(v.value, a[0]) + + + def test_float_from_address(self): + from array import array + for t in float_types: + a = array(t._type_, [3.14]) + v = t.from_address(a.buffer_info()[0]) + self.assertEqual(v.value, a[0]) + self.assertIs(type(v), t) + a[0] = 2.3456e17 + self.assertEqual(v.value, a[0]) + self.assertIs(type(v), t) + + def test_char_from_address(self): + from ctypes import c_char + from array import array + + a = array('c', 'x') + v = c_char.from_address(a.buffer_info()[0]) + self.assertEqual(v.value, a[0]) + self.assertIs(type(v), c_char) + + a[0] = '?' + self.assertEqual(v.value, a[0]) + + # array does not support c_bool / 't' + @unittest.skip('test disabled') + def test_bool_from_address(self): + from ctypes import c_bool + from array import array + a = array(c_bool._type_, [True]) + v = t.from_address(a.buffer_info()[0]) + self.assertEqual(v.value, a[0]) + self.assertEqual(type(v) is t) + a[0] = False + self.assertEqual(v.value, a[0]) + self.assertEqual(type(v) is t) + + def test_init(self): + # c_int() can be initialized from Python's int, and c_int. + # Not from c_long or so, which seems strange, abc should + # probably be changed: + self.assertRaises(TypeError, c_int, c_long(42)) + + def test_float_overflow(self): + import sys + big_int = int(sys.float_info.max) * 2 + for t in float_types + [c_longdouble]: + self.assertRaises(OverflowError, t, big_int) + if (hasattr(t, "__ctype_be__")): + self.assertRaises(OverflowError, t.__ctype_be__, big_int) + if (hasattr(t, "__ctype_le__")): + self.assertRaises(OverflowError, t.__ctype_le__, big_int) + + @unittest.skip('test disabled') + def test_perf(self): + check_perf() + +from ctypes import _SimpleCData +class c_int_S(_SimpleCData): + _type_ = "i" + __slots__ = [] + +def run_test(rep, msg, func, arg=None): +## items = [None] * rep + items = range(rep) + from time import clock + if arg is not None: + start = clock() + for i in items: + func(arg); func(arg); func(arg); func(arg); func(arg) + stop = clock() + else: + start = clock() + for i in items: + func(); func(); func(); func(); func() + stop = clock() + print "%15s: %.2f us" % (msg, ((stop-start)*1e6/5/rep)) + +def check_perf(): + # Construct 5 objects + from ctypes import c_int + + REP = 200000 + + run_test(REP, "int()", int) + run_test(REP, "int(999)", int) + run_test(REP, "c_int()", c_int) + run_test(REP, "c_int(999)", c_int) + run_test(REP, "c_int_S()", c_int_S) + run_test(REP, "c_int_S(999)", c_int_S) + +# Python 2.3 -OO, win2k, P4 700 MHz: +# +# int(): 0.87 us +# int(999): 0.87 us +# c_int(): 3.35 us +# c_int(999): 3.34 us +# c_int_S(): 3.23 us +# c_int_S(999): 3.24 us + +# Python 2.2 -OO, win2k, P4 700 MHz: +# +# int(): 0.89 us +# int(999): 0.89 us +# c_int(): 9.99 us +# c_int(999): 10.02 us +# c_int_S(): 9.87 us +# c_int_S(999): 9.85 us + +if __name__ == '__main__': +## check_perf() + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_objects.py b/third_party/stdlib/ctypes/test/test_objects.py new file mode 100644 index 00000000..a7c52472 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_objects.py @@ -0,0 +1,67 @@ +r''' +This tests the '_objects' attribute of ctypes instances. '_objects' +holds references to objects that must be kept alive as long as the +ctypes instance, to make sure that the memory buffer is valid. + +WARNING: The '_objects' attribute is exposed ONLY for debugging ctypes itself, +it MUST NEVER BE MODIFIED! + +'_objects' is initialized to a dictionary on first use, before that it +is None. + +Here is an array of string pointers: + +>>> from ctypes import * +>>> array = (c_char_p * 5)() +>>> print array._objects +None +>>> + +The memory block stores pointers to strings, and the strings itself +assigned from Python must be kept. + +>>> array[4] = 'foo bar' +>>> array._objects +{'4': 'foo bar'} +>>> array[4] +'foo bar' +>>> + +It gets more complicated when the ctypes instance itself is contained +in a 'base' object. + +>>> class X(Structure): +... _fields_ = [("x", c_int), ("y", c_int), ("array", c_char_p * 5)] +... +>>> x = X() +>>> print x._objects +None +>>> + +The'array' attribute of the 'x' object shares part of the memory buffer +of 'x' ('_b_base_' is either None, or the root object owning the memory block): + +>>> print x.array._b_base_ # doctest: +ELLIPSIS + +>>> + +>>> x.array[0] = 'spam spam spam' +>>> x._objects +{'0:2': 'spam spam spam'} +>>> x.array._b_base_._objects +{'0:2': 'spam spam spam'} +>>> + +''' + +import unittest, doctest, sys + +import ctypes.test.test_objects + +class TestCase(unittest.TestCase): + def test(self): + failures, tests = doctest.testmod(ctypes.test.test_objects) + self.assertFalse(failures, 'doctests failed, see output above') + +if __name__ == '__main__': + doctest.testmod(ctypes.test.test_objects) diff --git a/third_party/stdlib/ctypes/test/test_parameters.py b/third_party/stdlib/ctypes/test/test_parameters.py new file mode 100644 index 00000000..192d9ed4 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_parameters.py @@ -0,0 +1,181 @@ +import unittest, sys +from ctypes.test import need_symbol + +class SimpleTypesTestCase(unittest.TestCase): + + def setUp(self): + import ctypes + try: + from _ctypes import set_conversion_mode + except ImportError: + pass + else: + self.prev_conv_mode = set_conversion_mode("ascii", "strict") + + def tearDown(self): + try: + from _ctypes import set_conversion_mode + except ImportError: + pass + else: + set_conversion_mode(*self.prev_conv_mode) + + + def test_subclasses(self): + from ctypes import c_void_p, c_char_p + # ctypes 0.9.5 and before did overwrite from_param in SimpleType_new + class CVOIDP(c_void_p): + def from_param(cls, value): + return value * 2 + from_param = classmethod(from_param) + + class CCHARP(c_char_p): + def from_param(cls, value): + return value * 4 + from_param = classmethod(from_param) + + self.assertEqual(CVOIDP.from_param("abc"), "abcabc") + self.assertEqual(CCHARP.from_param("abc"), "abcabcabcabc") + + @need_symbol('c_wchar_p') + def test_subclasses_c_wchar_p(self): + from ctypes import c_wchar_p + + class CWCHARP(c_wchar_p): + def from_param(cls, value): + return value * 3 + from_param = classmethod(from_param) + + self.assertEqual(CWCHARP.from_param("abc"), "abcabcabc") + + # XXX Replace by c_char_p tests + def test_cstrings(self): + from ctypes import c_char_p, byref + + # c_char_p.from_param on a Python String packs the string + # into a cparam object + s = "123" + self.assertIs(c_char_p.from_param(s)._obj, s) + + # new in 0.9.1: convert (encode) unicode to ascii + self.assertEqual(c_char_p.from_param(u"123")._obj, "123") + self.assertRaises(UnicodeEncodeError, c_char_p.from_param, u"123\377") + + self.assertRaises(TypeError, c_char_p.from_param, 42) + + # calling c_char_p.from_param with a c_char_p instance + # returns the argument itself: + a = c_char_p("123") + self.assertIs(c_char_p.from_param(a), a) + + @need_symbol('c_wchar_p') + def test_cw_strings(self): + from ctypes import byref, c_wchar_p + s = u"123" + if sys.platform == "win32": + self.assertTrue(c_wchar_p.from_param(s)._obj is s) + self.assertRaises(TypeError, c_wchar_p.from_param, 42) + + # new in 0.9.1: convert (decode) ascii to unicode + self.assertEqual(c_wchar_p.from_param("123")._obj, u"123") + self.assertRaises(UnicodeDecodeError, c_wchar_p.from_param, "123\377") + + pa = c_wchar_p.from_param(c_wchar_p(u"123")) + self.assertEqual(type(pa), c_wchar_p) + + def test_int_pointers(self): + from ctypes import c_short, c_uint, c_int, c_long, POINTER, pointer + LPINT = POINTER(c_int) + +## p = pointer(c_int(42)) +## x = LPINT.from_param(p) + x = LPINT.from_param(pointer(c_int(42))) + self.assertEqual(x.contents.value, 42) + self.assertEqual(LPINT(c_int(42)).contents.value, 42) + + self.assertEqual(LPINT.from_param(None), None) + + if c_int != c_long: + self.assertRaises(TypeError, LPINT.from_param, pointer(c_long(42))) + self.assertRaises(TypeError, LPINT.from_param, pointer(c_uint(42))) + self.assertRaises(TypeError, LPINT.from_param, pointer(c_short(42))) + + def test_byref_pointer(self): + # The from_param class method of POINTER(typ) classes accepts what is + # returned by byref(obj), it type(obj) == typ + from ctypes import c_short, c_uint, c_int, c_long, pointer, POINTER, byref + LPINT = POINTER(c_int) + + LPINT.from_param(byref(c_int(42))) + + self.assertRaises(TypeError, LPINT.from_param, byref(c_short(22))) + if c_int != c_long: + self.assertRaises(TypeError, LPINT.from_param, byref(c_long(22))) + self.assertRaises(TypeError, LPINT.from_param, byref(c_uint(22))) + + def test_byref_pointerpointer(self): + # See above + from ctypes import c_short, c_uint, c_int, c_long, pointer, POINTER, byref + + LPLPINT = POINTER(POINTER(c_int)) + LPLPINT.from_param(byref(pointer(c_int(42)))) + + self.assertRaises(TypeError, LPLPINT.from_param, byref(pointer(c_short(22)))) + if c_int != c_long: + self.assertRaises(TypeError, LPLPINT.from_param, byref(pointer(c_long(22)))) + self.assertRaises(TypeError, LPLPINT.from_param, byref(pointer(c_uint(22)))) + + def test_array_pointers(self): + from ctypes import c_short, c_uint, c_int, c_long, POINTER + INTARRAY = c_int * 3 + ia = INTARRAY() + self.assertEqual(len(ia), 3) + self.assertEqual([ia[i] for i in range(3)], [0, 0, 0]) + + # Pointers are only compatible with arrays containing items of + # the same type! + LPINT = POINTER(c_int) + LPINT.from_param((c_int*3)()) + self.assertRaises(TypeError, LPINT.from_param, c_short*3) + self.assertRaises(TypeError, LPINT.from_param, c_long*3) + self.assertRaises(TypeError, LPINT.from_param, c_uint*3) + + def test_noctypes_argtype(self): + import _ctypes_test + from ctypes import CDLL, c_void_p, ArgumentError + + func = CDLL(_ctypes_test.__file__)._testfunc_p_p + func.restype = c_void_p + # TypeError: has no from_param method + self.assertRaises(TypeError, setattr, func, "argtypes", (object,)) + + class Adapter(object): + def from_param(cls, obj): + return None + + func.argtypes = (Adapter(),) + self.assertEqual(func(None), None) + self.assertEqual(func(object()), None) + + class Adapter(object): + def from_param(cls, obj): + return obj + + func.argtypes = (Adapter(),) + # don't know how to convert parameter 1 + self.assertRaises(ArgumentError, func, object()) + self.assertEqual(func(c_void_p(42)), 42) + + class Adapter(object): + def from_param(cls, obj): + raise ValueError(obj) + + func.argtypes = (Adapter(),) + # ArgumentError: argument 1: ValueError: 99 + self.assertRaises(ArgumentError, func, 99) + + +################################################################ + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_pep3118.py b/third_party/stdlib/ctypes/test/test_pep3118.py new file mode 100644 index 00000000..3e007e1b --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_pep3118.py @@ -0,0 +1,203 @@ +import unittest +from ctypes import * +import re, sys + +if sys.byteorder == "little": + THIS_ENDIAN = "<" + OTHER_ENDIAN = ">" +else: + THIS_ENDIAN = ">" + OTHER_ENDIAN = "<" + +def normalize(format): + # Remove current endian specifier and white space from a format + # string + if format is None: + return "" + format = format.replace(OTHER_ENDIAN, THIS_ENDIAN) + return re.sub(r"\s", "", format) + +class Test(unittest.TestCase): + + def test_native_types(self): + for tp, fmt, shape, itemtp in native_types: + ob = tp() + v = memoryview(ob) + try: + self.assertEqual(normalize(v.format), normalize(fmt)) + if shape is not None: + self.assertEqual(len(v), shape[0]) + else: + self.assertEqual(len(v) * sizeof(itemtp), sizeof(ob)) + self.assertEqual(v.itemsize, sizeof(itemtp)) + self.assertEqual(v.shape, shape) + # ctypes object always have a non-strided memory block + self.assertEqual(v.strides, None) + # they are always read/write + self.assertFalse(v.readonly) + + if v.shape: + n = 1 + for dim in v.shape: + n = n * dim + self.assertEqual(n * v.itemsize, len(v.tobytes())) + except: + # so that we can see the failing type + print(tp) + raise + + def test_endian_types(self): + for tp, fmt, shape, itemtp in endian_types: + ob = tp() + v = memoryview(ob) + try: + self.assertEqual(v.format, fmt) + if shape is not None: + self.assertEqual(len(v), shape[0]) + else: + self.assertEqual(len(v) * sizeof(itemtp), sizeof(ob)) + self.assertEqual(v.itemsize, sizeof(itemtp)) + self.assertEqual(v.shape, shape) + # ctypes object always have a non-strided memory block + self.assertEqual(v.strides, None) + # they are always read/write + self.assertFalse(v.readonly) + + if v.shape: + n = 1 + for dim in v.shape: + n = n * dim + self.assertEqual(n, len(v)) + except: + # so that we can see the failing type + print(tp) + raise + +# define some structure classes + +class Point(Structure): + _fields_ = [("x", c_long), ("y", c_long)] + +class PackedPoint(Structure): + _pack_ = 2 + _fields_ = [("x", c_long), ("y", c_long)] + +class Point2(Structure): + pass +Point2._fields_ = [("x", c_long), ("y", c_long)] + +class EmptyStruct(Structure): + _fields_ = [] + +class aUnion(Union): + _fields_ = [("a", c_int)] + +class StructWithArrays(Structure): + _fields_ = [("x", c_long * 3 * 2), ("y", Point * 4)] + + +class Incomplete(Structure): + pass + +class Complete(Structure): + pass +PComplete = POINTER(Complete) +Complete._fields_ = [("a", c_long)] + +################################################################ +# +# This table contains format strings as they look on little endian +# machines. The test replaces '<' with '>' on big endian machines. +# +native_types = [ + # type format shape calc itemsize + + ## simple types + + (c_char, "l:x:>l:y:}", None, BEPoint), + (LEPoint, "T{l:x:>l:y:}", None, POINTER(BEPoint)), + (POINTER(LEPoint), "&T{= 0: + return a + # View the bits in `a` as unsigned instead. + import struct + num_bits = struct.calcsize("P") * 8 # num bits in native machine address + a += 1L << num_bits + assert a >= 0 + return a + +def c_wbuffer(init): + n = len(init) + 1 + return (c_wchar * n)(*init) + +class CharPointersTestCase(unittest.TestCase): + + def setUp(self): + func = testdll._testfunc_p_p + func.restype = c_long + func.argtypes = None + + def test_paramflags(self): + # function returns c_void_p result, + # and has a required parameter named 'input' + prototype = CFUNCTYPE(c_void_p, c_void_p) + func = prototype(("_testfunc_p_p", testdll), + ((1, "input"),)) + + try: + func() + except TypeError, details: + self.assertEqual(str(details), "required argument 'input' missing") + else: + self.fail("TypeError not raised") + + self.assertEqual(func(None), None) + self.assertEqual(func(input=None), None) + + + def test_int_pointer_arg(self): + func = testdll._testfunc_p_p + func.restype = c_long + self.assertEqual(0, func(0)) + + ci = c_int(0) + + func.argtypes = POINTER(c_int), + self.assertEqual(positive_address(addressof(ci)), + positive_address(func(byref(ci)))) + + func.argtypes = c_char_p, + self.assertRaises(ArgumentError, func, byref(ci)) + + func.argtypes = POINTER(c_short), + self.assertRaises(ArgumentError, func, byref(ci)) + + func.argtypes = POINTER(c_double), + self.assertRaises(ArgumentError, func, byref(ci)) + + def test_POINTER_c_char_arg(self): + func = testdll._testfunc_p_p + func.restype = c_char_p + func.argtypes = POINTER(c_char), + + self.assertEqual(None, func(None)) + self.assertEqual("123", func("123")) + self.assertEqual(None, func(c_char_p(None))) + self.assertEqual("123", func(c_char_p("123"))) + + self.assertEqual("123", func(c_buffer("123"))) + ca = c_char("a") + self.assertEqual("a", func(pointer(ca))[0]) + self.assertEqual("a", func(byref(ca))[0]) + + def test_c_char_p_arg(self): + func = testdll._testfunc_p_p + func.restype = c_char_p + func.argtypes = c_char_p, + + self.assertEqual(None, func(None)) + self.assertEqual("123", func("123")) + self.assertEqual(None, func(c_char_p(None))) + self.assertEqual("123", func(c_char_p("123"))) + + self.assertEqual("123", func(c_buffer("123"))) + ca = c_char("a") + self.assertEqual("a", func(pointer(ca))[0]) + self.assertEqual("a", func(byref(ca))[0]) + + def test_c_void_p_arg(self): + func = testdll._testfunc_p_p + func.restype = c_char_p + func.argtypes = c_void_p, + + self.assertEqual(None, func(None)) + self.assertEqual("123", func("123")) + self.assertEqual("123", func(c_char_p("123"))) + self.assertEqual(None, func(c_char_p(None))) + + self.assertEqual("123", func(c_buffer("123"))) + ca = c_char("a") + self.assertEqual("a", func(pointer(ca))[0]) + self.assertEqual("a", func(byref(ca))[0]) + + func(byref(c_int())) + func(pointer(c_int())) + func((c_int * 3)()) + + @need_symbol('c_wchar_p') + def test_c_void_p_arg_with_c_wchar_p(self): + func = testdll._testfunc_p_p + func.restype = c_wchar_p + func.argtypes = c_void_p, + + self.assertEqual(None, func(c_wchar_p(None))) + self.assertEqual(u"123", func(c_wchar_p(u"123"))) + + def test_instance(self): + func = testdll._testfunc_p_p + func.restype = c_void_p + + class X: + _as_parameter_ = None + + func.argtypes = c_void_p, + self.assertEqual(None, func(X())) + + func.argtypes = None + self.assertEqual(None, func(X())) + +@need_symbol('c_wchar') +class WCharPointersTestCase(unittest.TestCase): + + def setUp(self): + func = testdll._testfunc_p_p + func.restype = c_int + func.argtypes = None + + + def test_POINTER_c_wchar_arg(self): + func = testdll._testfunc_p_p + func.restype = c_wchar_p + func.argtypes = POINTER(c_wchar), + + self.assertEqual(None, func(None)) + self.assertEqual(u"123", func(u"123")) + self.assertEqual(None, func(c_wchar_p(None))) + self.assertEqual(u"123", func(c_wchar_p(u"123"))) + + self.assertEqual(u"123", func(c_wbuffer(u"123"))) + ca = c_wchar("a") + self.assertEqual(u"a", func(pointer(ca))[0]) + self.assertEqual(u"a", func(byref(ca))[0]) + + def test_c_wchar_p_arg(self): + func = testdll._testfunc_p_p + func.restype = c_wchar_p + func.argtypes = c_wchar_p, + + c_wchar_p.from_param(u"123") + + self.assertEqual(None, func(None)) + self.assertEqual("123", func(u"123")) + self.assertEqual(None, func(c_wchar_p(None))) + self.assertEqual("123", func(c_wchar_p("123"))) + + # XXX Currently, these raise TypeErrors, although they shouldn't: + self.assertEqual("123", func(c_wbuffer("123"))) + ca = c_wchar("a") + self.assertEqual("a", func(pointer(ca))[0]) + self.assertEqual("a", func(byref(ca))[0]) + +class ArrayTest(unittest.TestCase): + def test(self): + func = testdll._testfunc_ai8 + func.restype = POINTER(c_int) + func.argtypes = c_int * 8, + + func((c_int * 8)(1, 2, 3, 4, 5, 6, 7, 8)) + + # This did crash before: + + def func(): pass + CFUNCTYPE(None, c_int * 3)(func) + +################################################################ + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_python_api.py b/third_party/stdlib/ctypes/test/test_python_api.py new file mode 100644 index 00000000..bc2ffd0e --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_python_api.py @@ -0,0 +1,90 @@ +from ctypes import * +import unittest, sys +from ctypes.test import requires + +################################################################ +# This section should be moved into ctypes\__init__.py, when it's ready. + +from _ctypes import PyObj_FromPtr + +################################################################ + +from sys import getrefcount as grc +if sys.version_info > (2, 4): + c_py_ssize_t = c_size_t +else: + c_py_ssize_t = c_int + +class PythonAPITestCase(unittest.TestCase): + + def test_PyString_FromStringAndSize(self): + PyString_FromStringAndSize = pythonapi.PyString_FromStringAndSize + + PyString_FromStringAndSize.restype = py_object + PyString_FromStringAndSize.argtypes = c_char_p, c_py_ssize_t + + self.assertEqual(PyString_FromStringAndSize("abcdefghi", 3), "abc") + + def test_PyString_FromString(self): + pythonapi.PyString_FromString.restype = py_object + pythonapi.PyString_FromString.argtypes = (c_char_p,) + + s = "abc" + refcnt = grc(s) + pyob = pythonapi.PyString_FromString(s) + self.assertEqual(grc(s), refcnt) + self.assertEqual(s, pyob) + del pyob + self.assertEqual(grc(s), refcnt) + + # This test is unreliable, because it is possible that code in + # unittest changes the refcount of the '42' integer. So, it + # is disabled by default. + def test_PyInt_Long(self): + requires("refcount") + ref42 = grc(42) + pythonapi.PyInt_FromLong.restype = py_object + self.assertEqual(pythonapi.PyInt_FromLong(42), 42) + + self.assertEqual(grc(42), ref42) + + pythonapi.PyInt_AsLong.argtypes = (py_object,) + pythonapi.PyInt_AsLong.restype = c_long + + res = pythonapi.PyInt_AsLong(42) + self.assertEqual(grc(res), ref42 + 1) + del res + self.assertEqual(grc(42), ref42) + + def test_PyObj_FromPtr(self): + s = "abc def ghi jkl" + ref = grc(s) + # id(python-object) is the address + pyobj = PyObj_FromPtr(id(s)) + self.assertIs(s, pyobj) + + self.assertEqual(grc(s), ref + 1) + del pyobj + self.assertEqual(grc(s), ref) + + def test_PyOS_snprintf(self): + PyOS_snprintf = pythonapi.PyOS_snprintf + PyOS_snprintf.argtypes = POINTER(c_char), c_size_t, c_char_p + + buf = c_buffer(256) + PyOS_snprintf(buf, sizeof(buf), "Hello from %s", "ctypes") + self.assertEqual(buf.value, "Hello from ctypes") + + PyOS_snprintf(buf, sizeof(buf), "Hello from %s", "ctypes", 1, 2, 3) + self.assertEqual(buf.value, "Hello from ctypes") + + # not enough arguments + self.assertRaises(TypeError, PyOS_snprintf, buf) + + def test_pyobject_repr(self): + self.assertEqual(repr(py_object()), "py_object()") + self.assertEqual(repr(py_object(42)), "py_object(42)") + self.assertEqual(repr(py_object(object)), "py_object(%r)" % object) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_random_things.py b/third_party/stdlib/ctypes/test/test_random_things.py new file mode 100644 index 00000000..d72b7c89 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_random_things.py @@ -0,0 +1,74 @@ +from ctypes import * +import unittest, sys + +def callback_func(arg): + 42 // arg + raise ValueError(arg) + +@unittest.skipUnless(sys.platform == "win32", 'Windows-specific test') +class call_function_TestCase(unittest.TestCase): + # _ctypes.call_function is deprecated and private, but used by + # Gary Bishp's readline module. If we have it, we must test it as well. + + def test(self): + from _ctypes import call_function + windll.kernel32.LoadLibraryA.restype = c_void_p + windll.kernel32.GetProcAddress.argtypes = c_void_p, c_char_p + windll.kernel32.GetProcAddress.restype = c_void_p + + hdll = windll.kernel32.LoadLibraryA("kernel32") + funcaddr = windll.kernel32.GetProcAddress(hdll, "GetModuleHandleA") + + self.assertEqual(call_function(funcaddr, (None,)), + windll.kernel32.GetModuleHandleA(None)) + +class CallbackTracbackTestCase(unittest.TestCase): + # When an exception is raised in a ctypes callback function, the C + # code prints a traceback. + # + # This test makes sure the exception types *and* the exception + # value is printed correctly. + # + # Changed in 0.9.3: No longer is '(in callback)' prepended to the + # error message - instead an additional frame for the C code is + # created, then a full traceback printed. When SystemExit is + # raised in a callback function, the interpreter exits. + + def capture_stderr(self, func, *args, **kw): + # helper - call function 'func', and return the captured stderr + import StringIO + old_stderr = sys.stderr + logger = sys.stderr = StringIO.StringIO() + try: + func(*args, **kw) + finally: + sys.stderr = old_stderr + return logger.getvalue() + + def test_ValueError(self): + cb = CFUNCTYPE(c_int, c_int)(callback_func) + out = self.capture_stderr(cb, 42) + self.assertEqual(out.splitlines()[-1], + "ValueError: 42") + + def test_IntegerDivisionError(self): + cb = CFUNCTYPE(c_int, c_int)(callback_func) + out = self.capture_stderr(cb, 0) + self.assertEqual(out.splitlines()[-1][:19], + "ZeroDivisionError: ") + + def test_FloatDivisionError(self): + cb = CFUNCTYPE(c_int, c_double)(callback_func) + out = self.capture_stderr(cb, 0.0) + self.assertEqual(out.splitlines()[-1][:19], + "ZeroDivisionError: ") + + def test_TypeErrorDivisionError(self): + cb = CFUNCTYPE(c_int, c_char_p)(callback_func) + out = self.capture_stderr(cb, "spam") + self.assertEqual(out.splitlines()[-1], + "TypeError: " + "unsupported operand type(s) for //: 'int' and 'str'") + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_refcounts.py b/third_party/stdlib/ctypes/test/test_refcounts.py new file mode 100644 index 00000000..20313469 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_refcounts.py @@ -0,0 +1,98 @@ +import unittest +import ctypes +import gc + +MyCallback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int) +OtherCallback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_ulonglong) + +import _ctypes_test +dll = ctypes.CDLL(_ctypes_test.__file__) + +class RefcountTestCase(unittest.TestCase): + + def test_1(self): + from sys import getrefcount as grc + + f = dll._testfunc_callback_i_if + f.restype = ctypes.c_int + f.argtypes = [ctypes.c_int, MyCallback] + + def callback(value): + #print "called back with", value + return value + + self.assertEqual(grc(callback), 2) + cb = MyCallback(callback) + + self.assertGreater(grc(callback), 2) + result = f(-10, cb) + self.assertEqual(result, -18) + cb = None + + gc.collect() + + self.assertEqual(grc(callback), 2) + + + def test_refcount(self): + from sys import getrefcount as grc + def func(*args): + pass + # this is the standard refcount for func + self.assertEqual(grc(func), 2) + + # the CFuncPtr instance holds at least one refcount on func: + f = OtherCallback(func) + self.assertGreater(grc(func), 2) + + # and may release it again + del f + self.assertGreaterEqual(grc(func), 2) + + # but now it must be gone + gc.collect() + self.assertEqual(grc(func), 2) + + class X(ctypes.Structure): + _fields_ = [("a", OtherCallback)] + x = X() + x.a = OtherCallback(func) + + # the CFuncPtr instance holds at least one refcount on func: + self.assertGreater(grc(func), 2) + + # and may release it again + del x + self.assertGreaterEqual(grc(func), 2) + + # and now it must be gone again + gc.collect() + self.assertEqual(grc(func), 2) + + f = OtherCallback(func) + + # the CFuncPtr instance holds at least one refcount on func: + self.assertGreater(grc(func), 2) + + # create a cycle + f.cycle = f + + del f + gc.collect() + self.assertEqual(grc(func), 2) + +class AnotherLeak(unittest.TestCase): + def test_callback(self): + import sys + + proto = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_int) + def func(a, b): + return a * b * 2 + f = proto(func) + + a = sys.getrefcount(ctypes.c_int) + f(1, 2) + self.assertEqual(sys.getrefcount(ctypes.c_int), a) + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_repr.py b/third_party/stdlib/ctypes/test/test_repr.py new file mode 100644 index 00000000..99cc5564 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_repr.py @@ -0,0 +1,29 @@ +from ctypes import * +import unittest + +subclasses = [] +for base in [c_byte, c_short, c_int, c_long, c_longlong, + c_ubyte, c_ushort, c_uint, c_ulong, c_ulonglong, + c_float, c_double, c_longdouble, c_bool]: + class X(base): + pass + subclasses.append(X) + +class X(c_char): + pass + +# This test checks if the __repr__ is correct for subclasses of simple types + +class ReprTest(unittest.TestCase): + def test_numbers(self): + for typ in subclasses: + base = typ.__bases__[0] + self.assertTrue(repr(base(42)).startswith(base.__name__)) + self.assertEqual(": " + "expected string or Unicode object, int found") + else: + # Compatibility no longer strictly required + self.assertEqual(msg, + "(Phone) exceptions.TypeError: " + "expected string or Unicode object, int found") + + cls, msg = self.get_except(Person, "Someone", ("a", "b", "c")) + self.assertEqual(cls, RuntimeError) + if issubclass(Exception, object): + self.assertEqual(msg, + "(Phone) : too many initializers") + else: + self.assertEqual(msg, "(Phone) exceptions.TypeError: too many initializers") + + def test_huge_field_name(self): + # issue12881: segfault with large structure field names + def create_class(length): + class S(Structure): + _fields_ = [('x' * length, c_int)] + + for length in [10 ** i for i in range(0, 8)]: + try: + create_class(length) + except MemoryError: + # MemoryErrors are OK, we just don't want to segfault + pass + + def get_except(self, func, *args): + try: + func(*args) + except Exception, detail: + return detail.__class__, str(detail) + + @unittest.skip('test disabled') + def test_subclass_creation(self): + meta = type(Structure) + # same as 'class X(Structure): pass' + # fails, since we need either a _fields_ or a _abstract_ attribute + cls, msg = self.get_except(meta, "X", (Structure,), {}) + self.assertEqual((cls, msg), + (AttributeError, "class must define a '_fields_' attribute")) + + def test_abstract_class(self): + class X(Structure): + _abstract_ = "something" + # try 'X()' + cls, msg = self.get_except(eval, "X()", locals()) + self.assertEqual((cls, msg), (TypeError, "abstract class")) + + def test_methods(self): +## class X(Structure): +## _fields_ = [] + + self.assertIn("in_dll", dir(type(Structure))) + self.assertIn("from_address", dir(type(Structure))) + self.assertIn("in_dll", dir(type(Structure))) + + def test_positional_args(self): + # see also http://bugs.python.org/issue5042 + class W(Structure): + _fields_ = [("a", c_int), ("b", c_int)] + class X(W): + _fields_ = [("c", c_int)] + class Y(X): + pass + class Z(Y): + _fields_ = [("d", c_int), ("e", c_int), ("f", c_int)] + + z = Z(1, 2, 3, 4, 5, 6) + self.assertEqual((z.a, z.b, z.c, z.d, z.e, z.f), + (1, 2, 3, 4, 5, 6)) + z = Z(1) + self.assertEqual((z.a, z.b, z.c, z.d, z.e, z.f), + (1, 0, 0, 0, 0, 0)) + self.assertRaises(TypeError, lambda: Z(1, 2, 3, 4, 5, 6, 7)) + +class PointerMemberTestCase(unittest.TestCase): + + def test(self): + # a Structure with a POINTER field + class S(Structure): + _fields_ = [("array", POINTER(c_int))] + + s = S() + # We can assign arrays of the correct type + s.array = (c_int * 3)(1, 2, 3) + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [1, 2, 3]) + + # The following are bugs, but are included here because the unittests + # also describe the current behaviour. + # + # This fails with SystemError: bad arg to internal function + # or with IndexError (with a patch I have) + + s.array[0] = 42 + + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [42, 2, 3]) + + s.array[0] = 1 + +## s.array[1] = 42 + + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [1, 2, 3]) + + def test_none_to_pointer_fields(self): + class S(Structure): + _fields_ = [("x", c_int), + ("p", POINTER(c_int))] + + s = S() + s.x = 12345678 + s.p = None + self.assertEqual(s.x, 12345678) + +class TestRecursiveStructure(unittest.TestCase): + def test_contains_itself(self): + class Recursive(Structure): + pass + + try: + Recursive._fields_ = [("next", Recursive)] + except AttributeError, details: + self.assertIn("Structure or union cannot contain itself", + str(details)) + else: + self.fail("Structure or union cannot contain itself") + + + def test_vice_versa(self): + class First(Structure): + pass + class Second(Structure): + pass + + First._fields_ = [("second", Second)] + + try: + Second._fields_ = [("first", First)] + except AttributeError, details: + self.assertIn("_fields_ is final", str(details)) + else: + self.fail("AttributeError not raised") + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_unaligned_structures.py b/third_party/stdlib/ctypes/test/test_unaligned_structures.py new file mode 100644 index 00000000..bcacfc81 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_unaligned_structures.py @@ -0,0 +1,45 @@ +import sys, unittest +from ctypes import * + +structures = [] +byteswapped_structures = [] + + +if sys.byteorder == "little": + SwappedStructure = BigEndianStructure +else: + SwappedStructure = LittleEndianStructure + +for typ in [c_short, c_int, c_long, c_longlong, + c_float, c_double, + c_ushort, c_uint, c_ulong, c_ulonglong]: + class X(Structure): + _pack_ = 1 + _fields_ = [("pad", c_byte), + ("value", typ)] + class Y(SwappedStructure): + _pack_ = 1 + _fields_ = [("pad", c_byte), + ("value", typ)] + structures.append(X) + byteswapped_structures.append(Y) + +class TestStructures(unittest.TestCase): + def test_native(self): + for typ in structures: +## print typ.value + self.assertEqual(typ.value.offset, 1) + o = typ() + o.value = 4 + self.assertEqual(o.value, 4) + + def test_swapped(self): + for typ in byteswapped_structures: +## print >> sys.stderr, typ.value + self.assertEqual(typ.value.offset, 1) + o = typ() + o.value = 4 + self.assertEqual(o.value, 4) + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_unicode.py b/third_party/stdlib/ctypes/test/test_unicode.py new file mode 100644 index 00000000..1da5a25f --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_unicode.py @@ -0,0 +1,138 @@ +# coding: latin-1 +import unittest +import ctypes +from ctypes.test import need_symbol +import _ctypes_test + +@need_symbol('c_wchar') +class UnicodeTestCase(unittest.TestCase): + @classmethod + def setUpClass(cls): + dll = ctypes.CDLL(_ctypes_test.__file__) + cls.wcslen = dll.my_wcslen + cls.wcslen.argtypes = [ctypes.c_wchar_p] + def setUp(self): + self.prev_conv_mode = ctypes.set_conversion_mode("ascii", "strict") + + def tearDown(self): + ctypes.set_conversion_mode(*self.prev_conv_mode) + + def test_ascii_strict(self): + wcslen = self.wcslen + ctypes.set_conversion_mode("ascii", "strict") + # no conversions take place with unicode arguments + self.assertEqual(wcslen(u"abc"), 3) + self.assertEqual(wcslen(u"ab\u2070"), 3) + # string args are converted + self.assertEqual(wcslen("abc"), 3) + self.assertRaises(ctypes.ArgumentError, wcslen, "ab�") + + def test_ascii_replace(self): + wcslen = self.wcslen + ctypes.set_conversion_mode("ascii", "replace") + self.assertEqual(wcslen(u"abc"), 3) + self.assertEqual(wcslen(u"ab\u2070"), 3) + self.assertEqual(wcslen("abc"), 3) + self.assertEqual(wcslen("ab�"), 3) + + def test_ascii_ignore(self): + wcslen = self.wcslen + ctypes.set_conversion_mode("ascii", "ignore") + self.assertEqual(wcslen(u"abc"), 3) + self.assertEqual(wcslen(u"ab\u2070"), 3) + # ignore error mode skips non-ascii characters + self.assertEqual(wcslen("abc"), 3) + self.assertEqual(wcslen("����"), 0) + + def test_latin1_strict(self): + wcslen = self.wcslen + ctypes.set_conversion_mode("latin-1", "strict") + self.assertEqual(wcslen(u"abc"), 3) + self.assertEqual(wcslen(u"ab\u2070"), 3) + self.assertEqual(wcslen("abc"), 3) + self.assertEqual(wcslen("����"), 4) + + def test_buffers(self): + ctypes.set_conversion_mode("ascii", "strict") + buf = ctypes.create_unicode_buffer("abc") + self.assertEqual(len(buf), 3+1) + + ctypes.set_conversion_mode("ascii", "replace") + buf = ctypes.create_unicode_buffer("ab���") + self.assertEqual(buf[:], u"ab\uFFFD\uFFFD\uFFFD\0") + self.assertEqual(buf[::], u"ab\uFFFD\uFFFD\uFFFD\0") + self.assertEqual(buf[::-1], u"\0\uFFFD\uFFFD\uFFFDba") + self.assertEqual(buf[::2], u"a\uFFFD\uFFFD") + self.assertEqual(buf[6:5:-1], u"") + + ctypes.set_conversion_mode("ascii", "ignore") + buf = ctypes.create_unicode_buffer("ab���") + # is that correct? not sure. But with 'ignore', you get what you pay for.. + self.assertEqual(buf[:], u"ab\0\0\0\0") + self.assertEqual(buf[::], u"ab\0\0\0\0") + self.assertEqual(buf[::-1], u"\0\0\0\0ba") + self.assertEqual(buf[::2], u"a\0\0") + self.assertEqual(buf[6:5:-1], u"") + +@need_symbol('c_wchar') +class StringTestCase(UnicodeTestCase): + @classmethod + def setUpClass(cls): + super(StringTestCase, cls).setUpClass() + cls.func = ctypes.CDLL(_ctypes_test.__file__)._testfunc_p_p + + def setUp(self): + func = self.func + self.prev_conv_mode = ctypes.set_conversion_mode("ascii", "strict") + func.argtypes = [ctypes.c_char_p] + func.restype = ctypes.c_char_p + + def tearDown(self): + func = self.func + ctypes.set_conversion_mode(*self.prev_conv_mode) + func.argtypes = None + func.restype = ctypes.c_int + + def test_ascii_replace(self): + func = self.func + ctypes.set_conversion_mode("ascii", "strict") + self.assertEqual(func("abc"), "abc") + self.assertEqual(func(u"abc"), "abc") + self.assertRaises(ctypes.ArgumentError, func, u"ab�") + + def test_ascii_ignore(self): + func = self.func + ctypes.set_conversion_mode("ascii", "ignore") + self.assertEqual(func("abc"), "abc") + self.assertEqual(func(u"abc"), "abc") + self.assertEqual(func(u"����"), "") + + def test_ascii_replace(self): + func = self.func + ctypes.set_conversion_mode("ascii", "replace") + self.assertEqual(func("abc"), "abc") + self.assertEqual(func(u"abc"), "abc") + self.assertEqual(func(u"����"), "????") + + def test_buffers(self): + ctypes.set_conversion_mode("ascii", "strict") + buf = ctypes.create_string_buffer(u"abc") + self.assertEqual(len(buf), 3+1) + + ctypes.set_conversion_mode("ascii", "replace") + buf = ctypes.create_string_buffer(u"ab���") + self.assertEqual(buf[:], "ab???\0") + self.assertEqual(buf[::], "ab???\0") + self.assertEqual(buf[::-1], "\0???ba") + self.assertEqual(buf[::2], "a??") + self.assertEqual(buf[6:5:-1], "") + + ctypes.set_conversion_mode("ascii", "ignore") + buf = ctypes.create_string_buffer(u"ab���") + # is that correct? not sure. But with 'ignore', you get what you pay for.. + self.assertEqual(buf[:], "ab\0\0\0\0") + self.assertEqual(buf[::], "ab\0\0\0\0") + self.assertEqual(buf[::-1], "\0\0\0\0ba") + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_values.py b/third_party/stdlib/ctypes/test/test_values.py new file mode 100644 index 00000000..fe7dcf05 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_values.py @@ -0,0 +1,83 @@ +""" +A testcase which accesses *values* in a dll. +""" + +import unittest +import sys +from ctypes import * + +import _ctypes_test + +class ValuesTestCase(unittest.TestCase): + + def test_an_integer(self): + ctdll = CDLL(_ctypes_test.__file__) + an_integer = c_int.in_dll(ctdll, "an_integer") + x = an_integer.value + self.assertEqual(x, ctdll.get_an_integer()) + an_integer.value *= 2 + self.assertEqual(x*2, ctdll.get_an_integer()) + + def test_undefined(self): + ctdll = CDLL(_ctypes_test.__file__) + self.assertRaises(ValueError, c_int.in_dll, ctdll, "Undefined_Symbol") + +class PythonValuesTestCase(unittest.TestCase): + """This test only works when python itself is a dll/shared library""" + + def test_optimizeflag(self): + # This test accesses the Py_OptimizeFlag intger, which is + # exported by the Python dll. + + # It's value is set depending on the -O and -OO flags: + # if not given, it is 0 and __debug__ is 1. + # If -O is given, the flag is 1, for -OO it is 2. + # docstrings are also removed in the latter case. + opt = c_int.in_dll(pythonapi, "Py_OptimizeFlag").value + if __debug__: + self.assertEqual(opt, 0) + elif ValuesTestCase.__doc__ is not None: + self.assertEqual(opt, 1) + else: + self.assertEqual(opt, 2) + + def test_frozentable(self): + # Python exports a PyImport_FrozenModules symbol. This is a + # pointer to an array of struct _frozen entries. The end of the + # array is marked by an entry containing a NULL name and zero + # size. + + # In standard Python, this table contains a __hello__ + # module, and a __phello__ package containing a spam + # module. + class struct_frozen(Structure): + _fields_ = [("name", c_char_p), + ("code", POINTER(c_ubyte)), + ("size", c_int)] + FrozenTable = POINTER(struct_frozen) + + ft = FrozenTable.in_dll(pythonapi, "PyImport_FrozenModules") + # ft is a pointer to the struct_frozen entries: + items = [] + for entry in ft: + # This is dangerous. We *can* iterate over a pointer, but + # the loop will not terminate (maybe with an access + # violation;-) because the pointer instance has no size. + if entry.name is None: + break + items.append((entry.name, entry.size)) + + expected = [("__hello__", 104), + ("__phello__", -104), + ("__phello__.spam", 104)] + self.assertEqual(items, expected) + + from ctypes import _pointer_type_cache + del _pointer_type_cache[struct_frozen] + + def test_undefined(self): + self.assertRaises(ValueError, c_int.in_dll, pythonapi, + "Undefined_Symbol") + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_varsize_struct.py b/third_party/stdlib/ctypes/test/test_varsize_struct.py new file mode 100644 index 00000000..f409500f --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_varsize_struct.py @@ -0,0 +1,50 @@ +from ctypes import * +import unittest + +class VarSizeTest(unittest.TestCase): + def test_resize(self): + class X(Structure): + _fields_ = [("item", c_int), + ("array", c_int * 1)] + + self.assertEqual(sizeof(X), sizeof(c_int) * 2) + x = X() + x.item = 42 + x.array[0] = 100 + self.assertEqual(sizeof(x), sizeof(c_int) * 2) + + # make room for one additional item + new_size = sizeof(X) + sizeof(c_int) * 1 + resize(x, new_size) + self.assertEqual(sizeof(x), new_size) + self.assertEqual((x.item, x.array[0]), (42, 100)) + + # make room for 10 additional items + new_size = sizeof(X) + sizeof(c_int) * 9 + resize(x, new_size) + self.assertEqual(sizeof(x), new_size) + self.assertEqual((x.item, x.array[0]), (42, 100)) + + # make room for one additional item + new_size = sizeof(X) + sizeof(c_int) * 1 + resize(x, new_size) + self.assertEqual(sizeof(x), new_size) + self.assertEqual((x.item, x.array[0]), (42, 100)) + + def test_array_invalid_length(self): + # cannot create arrays with non-positive size + self.assertRaises(ValueError, lambda: c_int * -1) + self.assertRaises(ValueError, lambda: c_int * -3) + + def test_zerosized_array(self): + array = (c_int * 0)() + # accessing elements of zero-sized arrays raise IndexError + self.assertRaises(IndexError, array.__setitem__, 0, None) + self.assertRaises(IndexError, array.__getitem__, 0) + self.assertRaises(IndexError, array.__setitem__, 1, None) + self.assertRaises(IndexError, array.__getitem__, 1) + self.assertRaises(IndexError, array.__setitem__, -1, None) + self.assertRaises(IndexError, array.__getitem__, -1) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_win32.py b/third_party/stdlib/ctypes/test/test_win32.py new file mode 100644 index 00000000..d22e139a --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_win32.py @@ -0,0 +1,122 @@ +# Windows specific tests + +from ctypes import * +from ctypes.test import requires +import unittest, sys +from test import test_support as support + +import _ctypes_test + +# Only windows 32-bit has different calling conventions. +@unittest.skipUnless(sys.platform == "win32", 'Windows-specific test') +@unittest.skipUnless(sizeof(c_void_p) == sizeof(c_int), + "sizeof c_void_p and c_int differ") +class WindowsTestCase(unittest.TestCase): + def test_callconv_1(self): + # Testing stdcall function + + IsWindow = windll.user32.IsWindow + # ValueError: Procedure probably called with not enough arguments + # (4 bytes missing) + self.assertRaises(ValueError, IsWindow) + + # This one should succeed... + self.assertEqual(0, IsWindow(0)) + + # ValueError: Procedure probably called with too many arguments + # (8 bytes in excess) + self.assertRaises(ValueError, IsWindow, 0, 0, 0) + + def test_callconv_2(self): + # Calling stdcall function as cdecl + + IsWindow = cdll.user32.IsWindow + + # ValueError: Procedure called with not enough arguments + # (4 bytes missing) or wrong calling convention + self.assertRaises(ValueError, IsWindow, None) + +@unittest.skipUnless(sys.platform == "win32", 'Windows-specific test') +class FunctionCallTestCase(unittest.TestCase): + @unittest.skipUnless('MSC' in sys.version, "SEH only supported by MSC") + @unittest.skipIf(sys.executable.endswith('_d.exe'), + "SEH not enabled in debug builds") + def test_SEH(self): + requires("SEH") + # Call functions with invalid arguments, and make sure + # that access violations are trapped and raise an + # exception. + self.assertRaises(WindowsError, windll.kernel32.GetModuleHandleA, 32) + + def test_noargs(self): + # This is a special case on win32 x64 + windll.user32.GetDesktopWindow() + +@unittest.skipUnless(sys.platform == "win32", 'Windows-specific test') +class TestWintypes(unittest.TestCase): + def test_HWND(self): + from ctypes import wintypes + self.assertEqual(sizeof(wintypes.HWND), sizeof(c_void_p)) + + def test_PARAM(self): + from ctypes import wintypes + self.assertEqual(sizeof(wintypes.WPARAM), + sizeof(c_void_p)) + self.assertEqual(sizeof(wintypes.LPARAM), + sizeof(c_void_p)) + + def test_COMError(self): + from _ctypes import COMError + if support.HAVE_DOCSTRINGS: + self.assertEqual(COMError.__doc__, + "Raised when a COM method call failed.") + + ex = COMError(-1, "text", ("details",)) + self.assertEqual(ex.hresult, -1) + self.assertEqual(ex.text, "text") + self.assertEqual(ex.details, ("details",)) + +class Structures(unittest.TestCase): + def test_struct_by_value(self): + class POINT(Structure): + _fields_ = [("x", c_long), + ("y", c_long)] + + class RECT(Structure): + _fields_ = [("left", c_long), + ("top", c_long), + ("right", c_long), + ("bottom", c_long)] + + dll = CDLL(_ctypes_test.__file__) + + pt = POINT(15, 25) + left = c_long.in_dll(dll, 'left') + top = c_long.in_dll(dll, 'top') + right = c_long.in_dll(dll, 'right') + bottom = c_long.in_dll(dll, 'bottom') + rect = RECT(left, top, right, bottom) + PointInRect = dll.PointInRect + PointInRect.argtypes = [POINTER(RECT), POINT] + self.assertEqual(1, PointInRect(byref(rect), pt)) + + ReturnRect = dll.ReturnRect + ReturnRect.argtypes = [c_int, RECT, POINTER(RECT), POINT, RECT, + POINTER(RECT), POINT, RECT] + ReturnRect.restype = RECT + for i in range(4): + ret = ReturnRect(i, rect, pointer(rect), pt, rect, + byref(rect), pt, rect) + # the c function will check and modify ret if something is + # passed in improperly + self.assertEqual(ret.left, left.value) + self.assertEqual(ret.right, right.value) + self.assertEqual(ret.top, top.value) + self.assertEqual(ret.bottom, bottom.value) + + # to not leak references, we must clean _pointer_type_cache + from ctypes import _pointer_type_cache + del _pointer_type_cache[RECT] + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/stdlib/ctypes/test/test_wintypes.py b/third_party/stdlib/ctypes/test/test_wintypes.py new file mode 100644 index 00000000..71442df8 --- /dev/null +++ b/third_party/stdlib/ctypes/test/test_wintypes.py @@ -0,0 +1,41 @@ +import sys +import unittest + +from ctypes import * + +@unittest.skipUnless(sys.platform.startswith('win'), 'Windows-only test') +class WinTypesTest(unittest.TestCase): + def test_variant_bool(self): + from ctypes import wintypes + # reads 16-bits from memory, anything non-zero is True + for true_value in (1, 32767, 32768, 65535, 65537): + true = POINTER(c_int16)(c_int16(true_value)) + value = cast(true, POINTER(wintypes.VARIANT_BOOL)) + self.assertEqual(repr(value.contents), 'VARIANT_BOOL(True)') + + vb = wintypes.VARIANT_BOOL() + self.assertIs(vb.value, False) + vb.value = True + self.assertIs(vb.value, True) + vb.value = true_value + self.assertIs(vb.value, True) + + for false_value in (0, 65536, 262144, 2**33): + false = POINTER(c_int16)(c_int16(false_value)) + value = cast(false, POINTER(wintypes.VARIANT_BOOL)) + self.assertEqual(repr(value.contents), 'VARIANT_BOOL(False)') + + # allow any bool conversion on assignment to value + for set_value in (65536, 262144, 2**33): + vb = wintypes.VARIANT_BOOL() + vb.value = set_value + self.assertIs(vb.value, True) + + vb = wintypes.VARIANT_BOOL() + vb.value = [2, 3] + self.assertIs(vb.value, True) + vb.value = [] + self.assertIs(vb.value, False) + +if __name__ == "__main__": + unittest.main() diff --git a/third_party/stdlib/ctypes/util.py b/third_party/stdlib/ctypes/util.py new file mode 100644 index 00000000..b2c514d5 --- /dev/null +++ b/third_party/stdlib/ctypes/util.py @@ -0,0 +1,271 @@ +import sys, os + +# find_library(name) returns the pathname of a library, or None. +if os.name == "nt": + + def _get_build_version(): + """Return the version of MSVC that was used to build Python. + + For Python 2.3 and up, the version number is included in + sys.version. For earlier versions, assume the compiler is MSVC 6. + """ + # This function was copied from Lib/distutils/msvccompiler.py + prefix = "MSC v." + i = sys.version.find(prefix) + if i == -1: + return 6 + i = i + len(prefix) + s, rest = sys.version[i:].split(" ", 1) + majorVersion = int(s[:-2]) - 6 + minorVersion = int(s[2:3]) / 10.0 + # I don't think paths are affected by minor version in version 6 + if majorVersion == 6: + minorVersion = 0 + if majorVersion >= 6: + return majorVersion + minorVersion + # else we don't know what version of the compiler this is + return None + + def find_msvcrt(): + """Return the name of the VC runtime dll""" + version = _get_build_version() + if version is None: + # better be safe than sorry + return None + if version <= 6: + clibname = 'msvcrt' + else: + clibname = 'msvcr%d' % (version * 10) + + # If python was built with in debug mode + import imp + if imp.get_suffixes()[0][0] == '_d.pyd': + clibname += 'd' + return clibname+'.dll' + + def find_library(name): + if name in ('c', 'm'): + return find_msvcrt() + # See MSDN for the REAL search order. + for directory in os.environ['PATH'].split(os.pathsep): + fname = os.path.join(directory, name) + if os.path.isfile(fname): + return fname + if fname.lower().endswith(".dll"): + continue + fname = fname + ".dll" + if os.path.isfile(fname): + return fname + return None + +if os.name == "ce": + # search path according to MSDN: + # - absolute path specified by filename + # - The .exe launch directory + # - the Windows directory + # - ROM dll files (where are they?) + # - OEM specified search path: HKLM\Loader\SystemPath + def find_library(name): + return name + +if os.name == "posix" and sys.platform == "darwin": + from ctypes.macholib.dyld import dyld_find as _dyld_find + def find_library(name): + possible = ['lib%s.dylib' % name, + '%s.dylib' % name, + '%s.framework/%s' % (name, name)] + for name in possible: + try: + return _dyld_find(name) + except ValueError: + continue + return None + +elif os.name == "posix": + # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump + import re, tempfile, errno + + def _findLib_gcc(name): + expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name) + fdout, ccout = tempfile.mkstemp() + os.close(fdout) + cmd = 'if type gcc >/dev/null 2>&1; then CC=gcc; elif type cc >/dev/null 2>&1; then CC=cc;else exit 10; fi;' \ + 'LANG=C LC_ALL=C $CC -Wl,-t -o ' + ccout + ' 2>&1 -l' + name + try: + f = os.popen(cmd) + try: + trace = f.read() + finally: + rv = f.close() + finally: + try: + os.unlink(ccout) + except OSError, e: + if e.errno != errno.ENOENT: + raise + if rv == 10: + raise OSError, 'gcc or cc command not found' + res = re.search(expr, trace) + if not res: + return None + return res.group(0) + + + if sys.platform == "sunos5": + # use /usr/ccs/bin/dump on solaris + def _get_soname(f): + if not f: + return None + cmd = "/usr/ccs/bin/dump -Lpv 2>/dev/null " + f + f = os.popen(cmd) + try: + data = f.read() + finally: + f.close() + res = re.search(r'\[.*\]\sSONAME\s+([^\s]+)', data) + if not res: + return None + return res.group(1) + else: + def _get_soname(f): + # assuming GNU binutils / ELF + if not f: + return None + cmd = 'if ! type objdump >/dev/null 2>&1; then exit 10; fi;' \ + "objdump -p -j .dynamic 2>/dev/null " + f + f = os.popen(cmd) + try: + dump = f.read() + finally: + rv = f.close() + if rv == 10: + raise OSError, 'objdump command not found' + res = re.search(r'\sSONAME\s+([^\s]+)', dump) + if not res: + return None + return res.group(1) + + if (sys.platform.startswith("freebsd") + or sys.platform.startswith("openbsd") + or sys.platform.startswith("dragonfly")): + + def _num_version(libname): + # "libxyz.so.MAJOR.MINOR" => [ MAJOR, MINOR ] + parts = libname.split(".") + nums = [] + try: + while parts: + nums.insert(0, int(parts.pop())) + except ValueError: + pass + return nums or [ sys.maxint ] + + def find_library(name): + ename = re.escape(name) + expr = r':-l%s\.\S+ => \S*/(lib%s\.\S+)' % (ename, ename) + f = os.popen('/sbin/ldconfig -r 2>/dev/null') + try: + data = f.read() + finally: + f.close() + res = re.findall(expr, data) + if not res: + return _get_soname(_findLib_gcc(name)) + res.sort(key=_num_version) + return res[-1] + + elif sys.platform == "sunos5": + + def _findLib_crle(name, is64): + if not os.path.exists('/usr/bin/crle'): + return None + + if is64: + cmd = 'env LC_ALL=C /usr/bin/crle -64 2>/dev/null' + else: + cmd = 'env LC_ALL=C /usr/bin/crle 2>/dev/null' + + paths = None + for line in os.popen(cmd).readlines(): + line = line.strip() + if line.startswith('Default Library Path (ELF):'): + paths = line.split()[4] + + if not paths: + return None + + for dir in paths.split(":"): + libfile = os.path.join(dir, "lib%s.so" % name) + if os.path.exists(libfile): + return libfile + + return None + + def find_library(name, is64 = False): + return _get_soname(_findLib_crle(name, is64) or _findLib_gcc(name)) + + else: + + def _findSoname_ldconfig(name): + import struct + if struct.calcsize('l') == 4: + machine = os.uname()[4] + '-32' + else: + machine = os.uname()[4] + '-64' + mach_map = { + 'x86_64-64': 'libc6,x86-64', + 'ppc64-64': 'libc6,64bit', + 'sparc64-64': 'libc6,64bit', + 's390x-64': 'libc6,64bit', + 'ia64-64': 'libc6,IA-64', + } + abi_type = mach_map.get(machine, 'libc6') + + # XXX assuming GLIBC's ldconfig (with option -p) + expr = r'\s+(lib%s\.[^\s]+)\s+\(%s' % (re.escape(name), abi_type) + f = os.popen('LC_ALL=C LANG=C /sbin/ldconfig -p 2>/dev/null') + try: + data = f.read() + finally: + f.close() + res = re.search(expr, data) + if not res: + return None + return res.group(1) + + def find_library(name): + return _findSoname_ldconfig(name) or _get_soname(_findLib_gcc(name)) + +################################################################ +# test code + +def test(): + from ctypes import cdll + if os.name == "nt": + print cdll.msvcrt + print cdll.load("msvcrt") + print find_library("msvcrt") + + if os.name == "posix": + # find and load_version + print find_library("m") + print find_library("c") + print find_library("bz2") + + # getattr +## print cdll.m +## print cdll.bz2 + + # load + if sys.platform == "darwin": + print cdll.LoadLibrary("libm.dylib") + print cdll.LoadLibrary("libcrypto.dylib") + print cdll.LoadLibrary("libSystem.dylib") + print cdll.LoadLibrary("System.framework/System") + else: + print cdll.LoadLibrary("libm.so") + print cdll.LoadLibrary("libcrypt.so") + print find_library("crypt") + +if __name__ == "__main__": + test() diff --git a/third_party/stdlib/ctypes/wintypes.py b/third_party/stdlib/ctypes/wintypes.py new file mode 100644 index 00000000..e7f569c9 --- /dev/null +++ b/third_party/stdlib/ctypes/wintypes.py @@ -0,0 +1,181 @@ +# The most useful windows datatypes +from ctypes import * + +BYTE = c_byte +WORD = c_ushort +DWORD = c_ulong + +WCHAR = c_wchar +UINT = c_uint +INT = c_int + +DOUBLE = c_double +FLOAT = c_float + +BOOLEAN = BYTE +BOOL = c_long + +from ctypes import _SimpleCData +class VARIANT_BOOL(_SimpleCData): + _type_ = "v" + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.value) + +ULONG = c_ulong +LONG = c_long + +USHORT = c_ushort +SHORT = c_short + +# in the windows header files, these are structures. +_LARGE_INTEGER = LARGE_INTEGER = c_longlong +_ULARGE_INTEGER = ULARGE_INTEGER = c_ulonglong + +LPCOLESTR = LPOLESTR = OLESTR = c_wchar_p +LPCWSTR = LPWSTR = c_wchar_p +LPCSTR = LPSTR = c_char_p +LPCVOID = LPVOID = c_void_p + +# WPARAM is defined as UINT_PTR (unsigned type) +# LPARAM is defined as LONG_PTR (signed type) +if sizeof(c_long) == sizeof(c_void_p): + WPARAM = c_ulong + LPARAM = c_long +elif sizeof(c_longlong) == sizeof(c_void_p): + WPARAM = c_ulonglong + LPARAM = c_longlong + +ATOM = WORD +LANGID = WORD + +COLORREF = DWORD +LGRPID = DWORD +LCTYPE = DWORD + +LCID = DWORD + +################################################################ +# HANDLE types +HANDLE = c_void_p # in the header files: void * + +HACCEL = HANDLE +HBITMAP = HANDLE +HBRUSH = HANDLE +HCOLORSPACE = HANDLE +HDC = HANDLE +HDESK = HANDLE +HDWP = HANDLE +HENHMETAFILE = HANDLE +HFONT = HANDLE +HGDIOBJ = HANDLE +HGLOBAL = HANDLE +HHOOK = HANDLE +HICON = HANDLE +HINSTANCE = HANDLE +HKEY = HANDLE +HKL = HANDLE +HLOCAL = HANDLE +HMENU = HANDLE +HMETAFILE = HANDLE +HMODULE = HANDLE +HMONITOR = HANDLE +HPALETTE = HANDLE +HPEN = HANDLE +HRGN = HANDLE +HRSRC = HANDLE +HSTR = HANDLE +HTASK = HANDLE +HWINSTA = HANDLE +HWND = HANDLE +SC_HANDLE = HANDLE +SERVICE_STATUS_HANDLE = HANDLE + +################################################################ +# Some important structure definitions + +class RECT(Structure): + _fields_ = [("left", c_long), + ("top", c_long), + ("right", c_long), + ("bottom", c_long)] +tagRECT = _RECTL = RECTL = RECT + +class _SMALL_RECT(Structure): + _fields_ = [('Left', c_short), + ('Top', c_short), + ('Right', c_short), + ('Bottom', c_short)] +SMALL_RECT = _SMALL_RECT + +class _COORD(Structure): + _fields_ = [('X', c_short), + ('Y', c_short)] + +class POINT(Structure): + _fields_ = [("x", c_long), + ("y", c_long)] +tagPOINT = _POINTL = POINTL = POINT + +class SIZE(Structure): + _fields_ = [("cx", c_long), + ("cy", c_long)] +tagSIZE = SIZEL = SIZE + +def RGB(red, green, blue): + return red + (green << 8) + (blue << 16) + +class FILETIME(Structure): + _fields_ = [("dwLowDateTime", DWORD), + ("dwHighDateTime", DWORD)] +_FILETIME = FILETIME + +class MSG(Structure): + _fields_ = [("hWnd", HWND), + ("message", c_uint), + ("wParam", WPARAM), + ("lParam", LPARAM), + ("time", DWORD), + ("pt", POINT)] +tagMSG = MSG +MAX_PATH = 260 + +class WIN32_FIND_DATAA(Structure): + _fields_ = [("dwFileAttributes", DWORD), + ("ftCreationTime", FILETIME), + ("ftLastAccessTime", FILETIME), + ("ftLastWriteTime", FILETIME), + ("nFileSizeHigh", DWORD), + ("nFileSizeLow", DWORD), + ("dwReserved0", DWORD), + ("dwReserved1", DWORD), + ("cFileName", c_char * MAX_PATH), + ("cAlternateFileName", c_char * 14)] + +class WIN32_FIND_DATAW(Structure): + _fields_ = [("dwFileAttributes", DWORD), + ("ftCreationTime", FILETIME), + ("ftLastAccessTime", FILETIME), + ("ftLastWriteTime", FILETIME), + ("nFileSizeHigh", DWORD), + ("nFileSizeLow", DWORD), + ("dwReserved0", DWORD), + ("dwReserved1", DWORD), + ("cFileName", c_wchar * MAX_PATH), + ("cAlternateFileName", c_wchar * 14)] + +__all__ = ['ATOM', 'BOOL', 'BOOLEAN', 'BYTE', 'COLORREF', 'DOUBLE', 'DWORD', + 'FILETIME', 'FLOAT', 'HACCEL', 'HANDLE', 'HBITMAP', 'HBRUSH', + 'HCOLORSPACE', 'HDC', 'HDESK', 'HDWP', 'HENHMETAFILE', 'HFONT', + 'HGDIOBJ', 'HGLOBAL', 'HHOOK', 'HICON', 'HINSTANCE', 'HKEY', + 'HKL', 'HLOCAL', 'HMENU', 'HMETAFILE', 'HMODULE', 'HMONITOR', + 'HPALETTE', 'HPEN', 'HRGN', 'HRSRC', 'HSTR', 'HTASK', 'HWINSTA', + 'HWND', 'INT', 'LANGID', 'LARGE_INTEGER', 'LCID', 'LCTYPE', + 'LGRPID', 'LONG', 'LPARAM', 'LPCOLESTR', 'LPCSTR', 'LPCVOID', + 'LPCWSTR', 'LPOLESTR', 'LPSTR', 'LPVOID', 'LPWSTR', 'MAX_PATH', + 'MSG', 'OLESTR', 'POINT', 'POINTL', 'RECT', 'RECTL', 'RGB', + 'SC_HANDLE', 'SERVICE_STATUS_HANDLE', 'SHORT', 'SIZE', 'SIZEL', + 'SMALL_RECT', 'UINT', 'ULARGE_INTEGER', 'ULONG', 'USHORT', + 'VARIANT_BOOL', 'WCHAR', 'WIN32_FIND_DATAA', 'WIN32_FIND_DATAW', + 'WORD', 'WPARAM', '_COORD', '_FILETIME', '_LARGE_INTEGER', + '_POINTL', '_RECTL', '_SMALL_RECT', '_ULARGE_INTEGER', 'tagMSG', + 'tagPOINT', 'tagRECT', 'tagSIZE'] diff --git a/third_party/stdlib/email/__init__.py b/third_party/stdlib/email/__init__.py new file mode 100644 index 00000000..a780ebe3 --- /dev/null +++ b/third_party/stdlib/email/__init__.py @@ -0,0 +1,123 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""A package for parsing, handling, and generating email messages.""" + +__version__ = '4.0.3' + +__all__ = [ + # Old names + 'base64MIME', + 'Charset', + 'Encoders', + 'Errors', + 'Generator', + 'Header', + 'Iterators', + 'Message', + 'MIMEAudio', + 'MIMEBase', + 'MIMEImage', + 'MIMEMessage', + 'MIMEMultipart', + 'MIMENonMultipart', + 'MIMEText', + 'Parser', + 'quopriMIME', + 'Utils', + 'message_from_string', + 'message_from_file', + # new names + 'base64mime', + 'charset', + 'encoders', + 'errors', + 'generator', + 'header', + 'iterators', + 'message', + 'mime', + 'parser', + 'quoprimime', + 'utils', + ] + + + +# Some convenience routines. Don't import Parser and Message as side-effects +# of importing email since those cascadingly import most of the rest of the +# email package. +def message_from_string(s, *args, **kws): + """Parse a string into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from email.parser import Parser + return Parser(*args, **kws).parsestr(s) + + +def message_from_file(fp, *args, **kws): + """Read a file and parse its contents into a Message object model. + + Optional _class and strict are passed to the Parser constructor. + """ + from email.parser import Parser + return Parser(*args, **kws).parse(fp) + + + +# Lazy loading to provide name mapping from new-style names (PEP 8 compatible +# email 4.0 module names), to old-style names (email 3.0 module names). +import sys + +class LazyImporter(object): + def __init__(self, module_name): + self.__name__ = 'email.' + module_name + + def __getattr__(self, name): + __import__(self.__name__) + mod = sys.modules[self.__name__] + self.__dict__.update(mod.__dict__) + return getattr(mod, name) + + +_LOWERNAMES = [ + # email. -> email. + 'Charset', + 'Encoders', + 'Errors', + 'FeedParser', + 'Generator', + 'Header', + 'Iterators', + 'Message', + 'Parser', + 'Utils', + 'base64MIME', + 'quopriMIME', + ] + +_MIMENAMES = [ + # email.MIME -> email.mime. + 'Audio', + 'Base', + 'Image', + 'Message', + 'Multipart', + 'NonMultipart', + 'Text', + ] + +for _name in _LOWERNAMES: + importer = LazyImporter(_name.lower()) + sys.modules['email.' + _name] = importer + setattr(sys.modules['email'], _name, importer) + + +import email.mime +for _name in _MIMENAMES: + importer = LazyImporter('mime.' + _name.lower()) + sys.modules['email.MIME' + _name] = importer + setattr(sys.modules['email'], 'MIME' + _name, importer) + setattr(sys.modules['email.mime'], _name, importer) diff --git a/third_party/stdlib/email/_parseaddr.py b/third_party/stdlib/email/_parseaddr.py new file mode 100644 index 00000000..690db2c2 --- /dev/null +++ b/third_party/stdlib/email/_parseaddr.py @@ -0,0 +1,497 @@ +# Copyright (C) 2002-2007 Python Software Foundation +# Contact: email-sig@python.org + +"""Email address parsing code. + +Lifted directly from rfc822.py. This should eventually be rewritten. +""" + +__all__ = [ + 'mktime_tz', + 'parsedate', + 'parsedate_tz', + 'quote', + ] + +import time, calendar + +SPACE = ' ' +EMPTYSTRING = '' +COMMASPACE = ', ' + +# Parse a date field +_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', + 'aug', 'sep', 'oct', 'nov', 'dec', + 'january', 'february', 'march', 'april', 'may', 'june', 'july', + 'august', 'september', 'october', 'november', 'december'] + +_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] + +# The timezone table does not include the military time zones defined +# in RFC822, other than Z. According to RFC1123, the description in +# RFC822 gets the signs wrong, so we can't rely on any such time +# zones. RFC1123 recommends that numeric timezone indicators be used +# instead of timezone names. + +_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, + 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) + 'EST': -500, 'EDT': -400, # Eastern + 'CST': -600, 'CDT': -500, # Central + 'MST': -700, 'MDT': -600, # Mountain + 'PST': -800, 'PDT': -700 # Pacific + } + + +def parsedate_tz(data): + """Convert a date string to a time tuple. + + Accounts for military timezones. + """ + data = data.split() + # The FWS after the comma after the day-of-week is optional, so search and + # adjust for this. + if data[0].endswith(',') or data[0].lower() in _daynames: + # There's a dayname here. Skip it + del data[0] + else: + i = data[0].rfind(',') + if i >= 0: + data[0] = data[0][i+1:] + if len(data) == 3: # RFC 850 date, deprecated + stuff = data[0].split('-') + if len(stuff) == 3: + data = stuff + data[1:] + if len(data) == 4: + s = data[3] + i = s.find('+') + if i > 0: + data[3:] = [s[:i], s[i+1:]] + else: + data.append('') # Dummy tz + if len(data) < 5: + return None + data = data[:5] + [dd, mm, yy, tm, tz] = data + mm = mm.lower() + if mm not in _monthnames: + dd, mm = mm, dd.lower() + if mm not in _monthnames: + return None + mm = _monthnames.index(mm) + 1 + if mm > 12: + mm -= 12 + if dd[-1] == ',': + dd = dd[:-1] + i = yy.find(':') + if i > 0: + yy, tm = tm, yy + if yy[-1] == ',': + yy = yy[:-1] + if not yy[0].isdigit(): + yy, tz = tz, yy + if tm[-1] == ',': + tm = tm[:-1] + tm = tm.split(':') + if len(tm) == 2: + [thh, tmm] = tm + tss = '0' + elif len(tm) == 3: + [thh, tmm, tss] = tm + else: + return None + try: + yy = int(yy) + dd = int(dd) + thh = int(thh) + tmm = int(tmm) + tss = int(tss) + except ValueError: + return None + # Check for a yy specified in two-digit format, then convert it to the + # appropriate four-digit format, according to the POSIX standard. RFC 822 + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) + # mandates a 4-digit yy. For more information, see the documentation for + # the time module. + if yy < 100: + # The year is between 1969 and 1999 (inclusive). + if yy > 68: + yy += 1900 + # The year is between 2000 and 2068 (inclusive). + else: + yy += 2000 + tzoffset = None + tz = tz.upper() + if tz in _timezones: + tzoffset = _timezones[tz] + else: + try: + tzoffset = int(tz) + except ValueError: + pass + # Convert a timezone offset into seconds ; -0500 -> -18000 + if tzoffset: + if tzoffset < 0: + tzsign = -1 + tzoffset = -tzoffset + else: + tzsign = 1 + tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) + # Daylight Saving Time flag is set to -1, since DST is unknown. + return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset + + +def parsedate(data): + """Convert a time string to a time tuple.""" + t = parsedate_tz(data) + if isinstance(t, tuple): + return t[:9] + else: + return t + + +def mktime_tz(data): + """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" + if data[9] is None: + # No zone info, so localtime is better assumption than GMT + return time.mktime(data[:8] + (-1,)) + else: + t = calendar.timegm(data) + return t - data[9] + + +def quote(str): + """Prepare string to be used in a quoted string. + + Turns backslash and double quote characters into quoted pairs. These + are the only characters that need to be quoted inside a quoted string. + Does not add the surrounding double quotes. + """ + return str.replace('\\', '\\\\').replace('"', '\\"') + + +class AddrlistClass: + """Address parser class by Ben Escoto. + + To understand what this class does, it helps to have a copy of RFC 2822 in + front of you. + + Note: this class interface is deprecated and may be removed in the future. + Use rfc822.AddressList instead. + """ + + def __init__(self, field): + """Initialize a new instance. + + `field' is an unparsed address header field, containing + one or more addresses. + """ + self.specials = '()<>@,:;.\"[]' + self.pos = 0 + self.LWS = ' \t' + self.CR = '\r\n' + self.FWS = self.LWS + self.CR + self.atomends = self.specials + self.LWS + self.CR + # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it + # is obsolete syntax. RFC 2822 requires that we recognize obsolete + # syntax, so allow dots in phrases. + self.phraseends = self.atomends.replace('.', '') + self.field = field + self.commentlist = [] + + def gotonext(self): + """Parse up to the start of the next address.""" + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS + '\n\r': + self.pos += 1 + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + else: + break + + def getaddrlist(self): + """Parse all addresses. + + Returns a list containing all of the addresses. + """ + result = [] + while self.pos < len(self.field): + ad = self.getaddress() + if ad: + result += ad + else: + result.append(('', '')) + return result + + def getaddress(self): + """Parse the next address.""" + self.commentlist = [] + self.gotonext() + + oldpos = self.pos + oldcl = self.commentlist + plist = self.getphraselist() + + self.gotonext() + returnlist = [] + + if self.pos >= len(self.field): + # Bad email address technically, no domain. + if plist: + returnlist = [(SPACE.join(self.commentlist), plist[0])] + + elif self.field[self.pos] in '.@': + # email address is just an addrspec + # this isn't very efficient since we start over + self.pos = oldpos + self.commentlist = oldcl + addrspec = self.getaddrspec() + returnlist = [(SPACE.join(self.commentlist), addrspec)] + + elif self.field[self.pos] == ':': + # address is a group + returnlist = [] + + fieldlen = len(self.field) + self.pos += 1 + while self.pos < len(self.field): + self.gotonext() + if self.pos < fieldlen and self.field[self.pos] == ';': + self.pos += 1 + break + returnlist = returnlist + self.getaddress() + + elif self.field[self.pos] == '<': + # Address is a phrase then a route addr + routeaddr = self.getrouteaddr() + + if self.commentlist: + returnlist = [(SPACE.join(plist) + ' (' + + ' '.join(self.commentlist) + ')', routeaddr)] + else: + returnlist = [(SPACE.join(plist), routeaddr)] + + else: + if plist: + returnlist = [(SPACE.join(self.commentlist), plist[0])] + elif self.field[self.pos] in self.specials: + self.pos += 1 + + self.gotonext() + if self.pos < len(self.field) and self.field[self.pos] == ',': + self.pos += 1 + return returnlist + + def getrouteaddr(self): + """Parse a route address (Return-path value). + + This method just skips all the route stuff and returns the addrspec. + """ + if self.field[self.pos] != '<': + return + + expectroute = False + self.pos += 1 + self.gotonext() + adlist = '' + while self.pos < len(self.field): + if expectroute: + self.getdomain() + expectroute = False + elif self.field[self.pos] == '>': + self.pos += 1 + break + elif self.field[self.pos] == '@': + self.pos += 1 + expectroute = True + elif self.field[self.pos] == ':': + self.pos += 1 + else: + adlist = self.getaddrspec() + self.pos += 1 + break + self.gotonext() + + return adlist + + def getaddrspec(self): + """Parse an RFC 2822 addr-spec.""" + aslist = [] + + self.gotonext() + while self.pos < len(self.field): + if self.field[self.pos] == '.': + aslist.append('.') + self.pos += 1 + elif self.field[self.pos] == '"': + aslist.append('"%s"' % quote(self.getquote())) + elif self.field[self.pos] in self.atomends: + break + else: + aslist.append(self.getatom()) + self.gotonext() + + if self.pos >= len(self.field) or self.field[self.pos] != '@': + return EMPTYSTRING.join(aslist) + + aslist.append('@') + self.pos += 1 + self.gotonext() + return EMPTYSTRING.join(aslist) + self.getdomain() + + def getdomain(self): + """Get the complete domain name from an address.""" + sdlist = [] + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS: + self.pos += 1 + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + elif self.field[self.pos] == '[': + sdlist.append(self.getdomainliteral()) + elif self.field[self.pos] == '.': + self.pos += 1 + sdlist.append('.') + elif self.field[self.pos] in self.atomends: + break + else: + sdlist.append(self.getatom()) + return EMPTYSTRING.join(sdlist) + + def getdelimited(self, beginchar, endchars, allowcomments=True): + """Parse a header fragment delimited by special characters. + + `beginchar' is the start character for the fragment. + If self is not looking at an instance of `beginchar' then + getdelimited returns the empty string. + + `endchars' is a sequence of allowable end-delimiting characters. + Parsing stops when one of these is encountered. + + If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed + within the parsed fragment. + """ + if self.field[self.pos] != beginchar: + return '' + + slist = [''] + quote = False + self.pos += 1 + while self.pos < len(self.field): + if quote: + slist.append(self.field[self.pos]) + quote = False + elif self.field[self.pos] in endchars: + self.pos += 1 + break + elif allowcomments and self.field[self.pos] == '(': + slist.append(self.getcomment()) + continue # have already advanced pos from getcomment + elif self.field[self.pos] == '\\': + quote = True + else: + slist.append(self.field[self.pos]) + self.pos += 1 + + return EMPTYSTRING.join(slist) + + def getquote(self): + """Get a quote-delimited fragment from self's field.""" + return self.getdelimited('"', '"\r', False) + + def getcomment(self): + """Get a parenthesis-delimited fragment from self's field.""" + return self.getdelimited('(', ')\r', True) + + def getdomainliteral(self): + """Parse an RFC 2822 domain-literal.""" + return '[%s]' % self.getdelimited('[', ']\r', False) + + def getatom(self, atomends=None): + """Parse an RFC 2822 atom. + + Optional atomends specifies a different set of end token delimiters + (the default is to use self.atomends). This is used e.g. in + getphraselist() since phrase endings must not include the `.' (which + is legal in phrases).""" + atomlist = [''] + if atomends is None: + atomends = self.atomends + + while self.pos < len(self.field): + if self.field[self.pos] in atomends: + break + else: + atomlist.append(self.field[self.pos]) + self.pos += 1 + + return EMPTYSTRING.join(atomlist) + + def getphraselist(self): + """Parse a sequence of RFC 2822 phrases. + + A phrase is a sequence of words, which are in turn either RFC 2822 + atoms or quoted-strings. Phrases are canonicalized by squeezing all + runs of continuous whitespace into one space. + """ + plist = [] + + while self.pos < len(self.field): + if self.field[self.pos] in self.FWS: + self.pos += 1 + elif self.field[self.pos] == '"': + plist.append(self.getquote()) + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + elif self.field[self.pos] in self.phraseends: + break + else: + plist.append(self.getatom(self.phraseends)) + + return plist + +class AddressList(AddrlistClass): + """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" + def __init__(self, field): + AddrlistClass.__init__(self, field) + if field: + self.addresslist = self.getaddrlist() + else: + self.addresslist = [] + + def __len__(self): + return len(self.addresslist) + + def __add__(self, other): + # Set union + newaddr = AddressList(None) + newaddr.addresslist = self.addresslist[:] + for x in other.addresslist: + if not x in self.addresslist: + newaddr.addresslist.append(x) + return newaddr + + def __iadd__(self, other): + # Set union, in-place + for x in other.addresslist: + if not x in self.addresslist: + self.addresslist.append(x) + return self + + def __sub__(self, other): + # Set difference + newaddr = AddressList(None) + for x in self.addresslist: + if not x in other.addresslist: + newaddr.addresslist.append(x) + return newaddr + + def __isub__(self, other): + # Set difference, in-place + for x in other.addresslist: + if x in self.addresslist: + self.addresslist.remove(x) + return self + + def __getitem__(self, index): + # Make indexing, slices, and 'in' work + return self.addresslist[index] diff --git a/third_party/stdlib/email/base64mime.py b/third_party/stdlib/email/base64mime.py new file mode 100644 index 00000000..4aa80002 --- /dev/null +++ b/third_party/stdlib/email/base64mime.py @@ -0,0 +1,183 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org + +"""Base64 content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit +characters encoding known as Base64. + +It is used in the MIME standards for email to attach images, audio, and text +using some 8-bit character sets to messages. + +This module provides an interface to encode and decode both headers and bodies +with Base64 encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:, From:, Cc:, etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character conversion +necessary for proper internationalized headers; it only does dumb encoding and +decoding. To deal with the various line wrapping issues, use the email.header +module. +""" + +__all__ = [ + 'base64_len', + 'body_decode', + 'body_encode', + 'decode', + 'decodestring', + 'encode', + 'encodestring', + 'header_encode', + ] + + +from binascii import b2a_base64, a2b_base64 +from email.utils import fix_eols + +CRLF = '\r\n' +NL = '\n' +EMPTYSTRING = '' + +# See also Charset.py +MISC_LEN = 7 + + + +# Helpers +def base64_len(s): + """Return the length of s when it is encoded with base64.""" + groups_of_3, leftover = divmod(len(s), 3) + # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. + # Thanks, Tim! + n = groups_of_3 * 4 + if leftover: + n += 4 + return n + + + +def header_encode(header, charset='iso-8859-1', keep_eols=False, + maxlinelen=76, eol=NL): + """Encode a single header line with Base64 encoding in a given charset. + + Defined in RFC 2045, this Base64 encoding is identical to normal Base64 + encoding, except that each line must be intelligently wrapped (respecting + the Base64 encoding), and subsequent lines must start with a space. + + charset names the character set to use to encode the header. It defaults + to iso-8859-1. + + End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted + to the canonical email line separator \\r\\n unless the keep_eols + parameter is True (the default is False). + + Each line of the header will be terminated in the value of eol, which + defaults to "\\n". Set this to "\\r\\n" if you are using the result of + this function directly in email. + + The resulting string will be in the form: + + "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n + =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?=" + + with each line wrapped at, at most, maxlinelen characters (defaults to 76 + characters). + """ + # Return empty headers unchanged + if not header: + return header + + if not keep_eols: + header = fix_eols(header) + + # Base64 encode each line, in encoded chunks no greater than maxlinelen in + # length, after the RFC chrome is added in. + base64ed = [] + max_encoded = maxlinelen - len(charset) - MISC_LEN + max_unencoded = max_encoded * 3 // 4 + + for i in range(0, len(header), max_unencoded): + base64ed.append(b2a_base64(header[i:i+max_unencoded])) + + # Now add the RFC chrome to each encoded chunk + lines = [] + for line in base64ed: + # Ignore the last character of each line if it is a newline + if line.endswith(NL): + line = line[:-1] + # Add the chrome + lines.append('=?%s?b?%s?=' % (charset, line)) + # Glue the lines together and return it. BAW: should we be able to + # specify the leading whitespace in the joiner? + joiner = eol + ' ' + return joiner.join(lines) + + + +def encode(s, binary=True, maxlinelen=76, eol=NL): + """Encode a string with base64. + + Each line will be wrapped at, at most, maxlinelen characters (defaults to + 76 characters). + + If binary is False, end-of-line characters will be converted to the + canonical email end-of-line sequence \\r\\n. Otherwise they will be left + verbatim (this is the default). + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\\r\\n" if you will be using the result of this function directly + in an email. + """ + if not s: + return s + + if not binary: + s = fix_eols(s) + + encvec = [] + max_unencoded = maxlinelen * 3 // 4 + for i in range(0, len(s), max_unencoded): + # BAW: should encode() inherit b2a_base64()'s dubious behavior in + # adding a newline to the encoded string? + enc = b2a_base64(s[i:i + max_unencoded]) + if enc.endswith(NL) and eol != NL: + enc = enc[:-1] + eol + encvec.append(enc) + return EMPTYSTRING.join(encvec) + + +# For convenience and backwards compatibility w/ standard base64 module +body_encode = encode +encodestring = encode + + + +def decode(s, convert_eols=None): + """Decode a raw base64 string. + + If convert_eols is set to a string value, all canonical email linefeeds, + e.g. "\\r\\n", in the decoded text will be converted to the value of + convert_eols. os.linesep is a good choice for convert_eols if you are + decoding a text attachment. + + This function does not parse a full MIME header value encoded with + base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high + level email.header class for that functionality. + """ + if not s: + return s + + dec = a2b_base64(s) + if convert_eols: + return dec.replace(CRLF, convert_eols) + return dec + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode diff --git a/third_party/stdlib/email/charset.py b/third_party/stdlib/email/charset.py new file mode 100644 index 00000000..30a13ff2 --- /dev/null +++ b/third_party/stdlib/email/charset.py @@ -0,0 +1,397 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org + +__all__ = [ + 'Charset', + 'add_alias', + 'add_charset', + 'add_codec', + ] + +import codecs +import email.base64mime +import email.quoprimime + +from email import errors +from email.encoders import encode_7or8bit + + + +# Flags for types of header encodings +QP = 1 # Quoted-Printable +BASE64 = 2 # Base64 +SHORTEST = 3 # the shorter of QP and base64, but only for headers + +# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 +MISC_LEN = 7 + +DEFAULT_CHARSET = 'us-ascii' + + + +# Defaults +CHARSETS = { + # input header enc body enc output conv + 'iso-8859-1': (QP, QP, None), + 'iso-8859-2': (QP, QP, None), + 'iso-8859-3': (QP, QP, None), + 'iso-8859-4': (QP, QP, None), + # iso-8859-5 is Cyrillic, and not especially used + # iso-8859-6 is Arabic, also not particularly used + # iso-8859-7 is Greek, QP will not make it readable + # iso-8859-8 is Hebrew, QP will not make it readable + 'iso-8859-9': (QP, QP, None), + 'iso-8859-10': (QP, QP, None), + # iso-8859-11 is Thai, QP will not make it readable + 'iso-8859-13': (QP, QP, None), + 'iso-8859-14': (QP, QP, None), + 'iso-8859-15': (QP, QP, None), + 'iso-8859-16': (QP, QP, None), + 'windows-1252':(QP, QP, None), + 'viscii': (QP, QP, None), + 'us-ascii': (None, None, None), + 'big5': (BASE64, BASE64, None), + 'gb2312': (BASE64, BASE64, None), + 'euc-jp': (BASE64, None, 'iso-2022-jp'), + 'shift_jis': (BASE64, None, 'iso-2022-jp'), + 'iso-2022-jp': (BASE64, None, None), + 'koi8-r': (BASE64, BASE64, None), + 'utf-8': (SHORTEST, BASE64, 'utf-8'), + # We're making this one up to represent raw unencoded 8-bit + '8bit': (None, BASE64, 'utf-8'), + } + +# Aliases for other commonly-used names for character sets. Map +# them to the real ones used in email. +ALIASES = { + 'latin_1': 'iso-8859-1', + 'latin-1': 'iso-8859-1', + 'latin_2': 'iso-8859-2', + 'latin-2': 'iso-8859-2', + 'latin_3': 'iso-8859-3', + 'latin-3': 'iso-8859-3', + 'latin_4': 'iso-8859-4', + 'latin-4': 'iso-8859-4', + 'latin_5': 'iso-8859-9', + 'latin-5': 'iso-8859-9', + 'latin_6': 'iso-8859-10', + 'latin-6': 'iso-8859-10', + 'latin_7': 'iso-8859-13', + 'latin-7': 'iso-8859-13', + 'latin_8': 'iso-8859-14', + 'latin-8': 'iso-8859-14', + 'latin_9': 'iso-8859-15', + 'latin-9': 'iso-8859-15', + 'latin_10':'iso-8859-16', + 'latin-10':'iso-8859-16', + 'cp949': 'ks_c_5601-1987', + 'euc_jp': 'euc-jp', + 'euc_kr': 'euc-kr', + 'ascii': 'us-ascii', + } + + +# Map charsets to their Unicode codec strings. +CODEC_MAP = { + 'gb2312': 'eucgb2312_cn', + 'big5': 'big5_tw', + # Hack: We don't want *any* conversion for stuff marked us-ascii, as all + # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. + # Let that stuff pass through without conversion to/from Unicode. + 'us-ascii': None, + } + + + +# Convenience functions for extending the above mappings +def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): + """Add character set properties to the global registry. + + charset is the input character set, and must be the canonical name of a + character set. + + Optional header_enc and body_enc is either Charset.QP for + quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for + the shortest of qp or base64 encoding, or None for no encoding. SHORTEST + is only valid for header_enc. It describes how message headers and + message bodies in the input charset are to be encoded. Default is no + encoding. + + Optional output_charset is the character set that the output should be + in. Conversions will proceed from input charset, to Unicode, to the + output charset when the method Charset.convert() is called. The default + is to output in the same character set as the input. + + Both input_charset and output_charset must have Unicode codec entries in + the module's charset-to-codec mapping; use add_codec(charset, codecname) + to add codecs the module does not know about. See the codecs module's + documentation for more information. + """ + if body_enc == SHORTEST: + raise ValueError('SHORTEST not allowed for body_enc') + CHARSETS[charset] = (header_enc, body_enc, output_charset) + + +def add_alias(alias, canonical): + """Add a character set alias. + + alias is the alias name, e.g. latin-1 + canonical is the character set's canonical name, e.g. iso-8859-1 + """ + ALIASES[alias] = canonical + + +def add_codec(charset, codecname): + """Add a codec that map characters in the given charset to/from Unicode. + + charset is the canonical name of a character set. codecname is the name + of a Python codec, as appropriate for the second argument to the unicode() + built-in, or to the encode() method of a Unicode string. + """ + CODEC_MAP[charset] = codecname + + + +class Charset: + """Map character sets to their email properties. + + This class provides information about the requirements imposed on email + for a specific character set. It also provides convenience routines for + converting between character sets, given the availability of the + applicable codecs. Given a character set, it will do its best to provide + information on how to use that character set in an email in an + RFC-compliant way. + + Certain character sets must be encoded with quoted-printable or base64 + when used in email headers or bodies. Certain character sets must be + converted outright, and are not allowed in email. Instances of this + module expose the following information about a character set: + + input_charset: The initial character set specified. Common aliases + are converted to their `official' email names (e.g. latin_1 + is converted to iso-8859-1). Defaults to 7-bit us-ascii. + + header_encoding: If the character set must be encoded before it can be + used in an email header, this attribute will be set to + Charset.QP (for quoted-printable), Charset.BASE64 (for + base64 encoding), or Charset.SHORTEST for the shortest of + QP or BASE64 encoding. Otherwise, it will be None. + + body_encoding: Same as header_encoding, but describes the encoding for the + mail message's body, which indeed may be different than the + header encoding. Charset.SHORTEST is not allowed for + body_encoding. + + output_charset: Some character sets must be converted before they can be + used in email headers or bodies. If the input_charset is + one of them, this attribute will contain the name of the + charset output will be converted to. Otherwise, it will + be None. + + input_codec: The name of the Python codec used to convert the + input_charset to Unicode. If no conversion codec is + necessary, this attribute will be None. + + output_codec: The name of the Python codec used to convert Unicode + to the output_charset. If no conversion codec is necessary, + this attribute will have the same value as the input_codec. + """ + def __init__(self, input_charset=DEFAULT_CHARSET): + # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to + # unicode because its .lower() is locale insensitive. If the argument + # is already a unicode, we leave it at that, but ensure that the + # charset is ASCII, as the standard (RFC XXX) requires. + try: + if isinstance(input_charset, unicode): + input_charset.encode('ascii') + else: + input_charset = unicode(input_charset, 'ascii') + except UnicodeError: + raise errors.CharsetError(input_charset) + input_charset = input_charset.lower().encode('ascii') + # Set the input charset after filtering through the aliases and/or codecs + if not (input_charset in ALIASES or input_charset in CHARSETS): + try: + input_charset = codecs.lookup(input_charset).name + except LookupError: + pass + self.input_charset = ALIASES.get(input_charset, input_charset) + # We can try to guess which encoding and conversion to use by the + # charset_map dictionary. Try that first, but let the user override + # it. + henc, benc, conv = CHARSETS.get(self.input_charset, + (SHORTEST, BASE64, None)) + if not conv: + conv = self.input_charset + # Set the attributes, allowing the arguments to override the default. + self.header_encoding = henc + self.body_encoding = benc + self.output_charset = ALIASES.get(conv, conv) + # Now set the codecs. If one isn't defined for input_charset, + # guess and try a Unicode codec with the same name as input_codec. + self.input_codec = CODEC_MAP.get(self.input_charset, + self.input_charset) + self.output_codec = CODEC_MAP.get(self.output_charset, + self.output_charset) + + def __str__(self): + return self.input_charset.lower() + + __repr__ = __str__ + + def __eq__(self, other): + return str(self) == str(other).lower() + + def __ne__(self, other): + return not self.__eq__(other) + + def get_body_encoding(self): + """Return the content-transfer-encoding used for body encoding. + + This is either the string `quoted-printable' or `base64' depending on + the encoding used, or it is a function in which case you should call + the function with a single argument, the Message object being + encoded. The function should then set the Content-Transfer-Encoding + header itself to whatever is appropriate. + + Returns "quoted-printable" if self.body_encoding is QP. + Returns "base64" if self.body_encoding is BASE64. + Returns "7bit" otherwise. + """ + assert self.body_encoding != SHORTEST + if self.body_encoding == QP: + return 'quoted-printable' + elif self.body_encoding == BASE64: + return 'base64' + else: + return encode_7or8bit + + def convert(self, s): + """Convert a string from the input_codec to the output_codec.""" + if self.input_codec != self.output_codec: + return unicode(s, self.input_codec).encode(self.output_codec) + else: + return s + + def to_splittable(self, s): + """Convert a possibly multibyte string to a safely splittable format. + + Uses the input_codec to try and convert the string to Unicode, so it + can be safely split on character boundaries (even for multibyte + characters). + + Returns the string as-is if it isn't known how to convert it to + Unicode with the input_charset. + + Characters that could not be converted to Unicode will be replaced + with the Unicode replacement character U+FFFD. + """ + if isinstance(s, unicode) or self.input_codec is None: + return s + try: + return unicode(s, self.input_codec, 'replace') + except LookupError: + # Input codec not installed on system, so return the original + # string unchanged. + return s + + def from_splittable(self, ustr, to_output=True): + """Convert a splittable string back into an encoded string. + + Uses the proper codec to try and convert the string from Unicode back + into an encoded format. Return the string as-is if it is not Unicode, + or if it could not be converted from Unicode. + + Characters that could not be converted from Unicode will be replaced + with an appropriate character (usually '?'). + + If to_output is True (the default), uses output_codec to convert to an + encoded format. If to_output is False, uses input_codec. + """ + if to_output: + codec = self.output_codec + else: + codec = self.input_codec + if not isinstance(ustr, unicode) or codec is None: + return ustr + try: + return ustr.encode(codec, 'replace') + except LookupError: + # Output codec not installed + return ustr + + def get_output_charset(self): + """Return the output character set. + + This is self.output_charset if that is not None, otherwise it is + self.input_charset. + """ + return self.output_charset or self.input_charset + + def encoded_header_len(self, s): + """Return the length of the encoded header string.""" + cset = self.get_output_charset() + # The len(s) of a 7bit encoding is len(s) + if self.header_encoding == BASE64: + return email.base64mime.base64_len(s) + len(cset) + MISC_LEN + elif self.header_encoding == QP: + return email.quoprimime.header_quopri_len(s) + len(cset) + MISC_LEN + elif self.header_encoding == SHORTEST: + lenb64 = email.base64mime.base64_len(s) + lenqp = email.quoprimime.header_quopri_len(s) + return min(lenb64, lenqp) + len(cset) + MISC_LEN + else: + return len(s) + + def header_encode(self, s, convert=False): + """Header-encode a string, optionally converting it to output_charset. + + If convert is True, the string will be converted from the input + charset to the output charset automatically. This is not useful for + multibyte character sets, which have line length issues (multibyte + characters must be split on a character, not a byte boundary); use the + high-level Header class to deal with these issues. convert defaults + to False. + + The type of encoding (base64 or quoted-printable) will be based on + self.header_encoding. + """ + cset = self.get_output_charset() + if convert: + s = self.convert(s) + # 7bit/8bit encodings return the string unchanged (modulo conversions) + if self.header_encoding == BASE64: + return email.base64mime.header_encode(s, cset) + elif self.header_encoding == QP: + return email.quoprimime.header_encode(s, cset, maxlinelen=None) + elif self.header_encoding == SHORTEST: + lenb64 = email.base64mime.base64_len(s) + lenqp = email.quoprimime.header_quopri_len(s) + if lenb64 < lenqp: + return email.base64mime.header_encode(s, cset) + else: + return email.quoprimime.header_encode(s, cset, maxlinelen=None) + else: + return s + + def body_encode(self, s, convert=True): + """Body-encode a string and convert it to output_charset. + + If convert is True (the default), the string will be converted from + the input charset to output charset automatically. Unlike + header_encode(), there are no issues with byte boundaries and + multibyte charsets in email bodies, so this is usually pretty safe. + + The type of encoding (base64 or quoted-printable) will be based on + self.body_encoding. + """ + if convert: + s = self.convert(s) + # 7bit/8bit encodings return the string unchanged (module conversions) + if self.body_encoding is BASE64: + return email.base64mime.body_encode(s) + elif self.body_encoding is QP: + return email.quoprimime.body_encode(s) + else: + return s diff --git a/third_party/stdlib/email/encoders.py b/third_party/stdlib/email/encoders.py new file mode 100644 index 00000000..af45e62c --- /dev/null +++ b/third_party/stdlib/email/encoders.py @@ -0,0 +1,82 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Encodings and related functions.""" + +__all__ = [ + 'encode_7or8bit', + 'encode_base64', + 'encode_noop', + 'encode_quopri', + ] + +import base64 + +from quopri import encodestring as _encodestring + + + +def _qencode(s): + enc = _encodestring(s, quotetabs=True) + # Must encode spaces, which quopri.encodestring() doesn't do + return enc.replace(' ', '=20') + + +def _bencode(s): + # We can't quite use base64.encodestring() since it tacks on a "courtesy + # newline". Blech! + if not s: + return s + hasnewline = (s[-1] == '\n') + value = base64.encodestring(s) + if not hasnewline and value[-1] == '\n': + return value[:-1] + return value + + + +def encode_base64(msg): + """Encode the message's payload in Base64. + + Also, add an appropriate Content-Transfer-Encoding header. + """ + orig = msg.get_payload() + encdata = _bencode(orig) + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'base64' + + + +def encode_quopri(msg): + """Encode the message's payload in quoted-printable. + + Also, add an appropriate Content-Transfer-Encoding header. + """ + orig = msg.get_payload() + encdata = _qencode(orig) + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'quoted-printable' + + + +def encode_7or8bit(msg): + """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" + orig = msg.get_payload() + if orig is None: + # There's no payload. For backwards compatibility we use 7bit + msg['Content-Transfer-Encoding'] = '7bit' + return + # We play a trick to make this go fast. If encoding to ASCII succeeds, we + # know the data must be 7bit, otherwise treat it as 8bit. + try: + orig.encode('ascii') + except UnicodeError: + msg['Content-Transfer-Encoding'] = '8bit' + else: + msg['Content-Transfer-Encoding'] = '7bit' + + + +def encode_noop(msg): + """Do nothing.""" diff --git a/third_party/stdlib/email/errors.py b/third_party/stdlib/email/errors.py new file mode 100644 index 00000000..d52a6246 --- /dev/null +++ b/third_party/stdlib/email/errors.py @@ -0,0 +1,57 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""email package exception classes.""" + + + +class MessageError(Exception): + """Base class for errors in the email package.""" + + +class MessageParseError(MessageError): + """Base class for message parsing errors.""" + + +class HeaderParseError(MessageParseError): + """Error while parsing headers.""" + + +class BoundaryError(MessageParseError): + """Couldn't find terminating boundary.""" + + +class MultipartConversionError(MessageError, TypeError): + """Conversion to a multipart is prohibited.""" + + +class CharsetError(MessageError): + """An illegal charset was given.""" + + + +# These are parsing defects which the parser was able to work around. +class MessageDefect: + """Base class for a message defect.""" + + def __init__(self, line=None): + self.line = line + +class NoBoundaryInMultipartDefect(MessageDefect): + """A message claimed to be a multipart but had no boundary parameter.""" + +class StartBoundaryNotFoundDefect(MessageDefect): + """The claimed start boundary was never found.""" + +class FirstHeaderLineIsContinuationDefect(MessageDefect): + """A message had a continuation line as its first header line.""" + +class MisplacedEnvelopeHeaderDefect(MessageDefect): + """A 'Unix-from' header was found in the middle of a header block.""" + +class MalformedHeaderDefect(MessageDefect): + """Found a header that was missing a colon, or was otherwise malformed.""" + +class MultipartInvariantViolationDefect(MessageDefect): + """A message claimed to be a multipart but no subparts were found.""" diff --git a/third_party/stdlib/email/feedparser.py b/third_party/stdlib/email/feedparser.py new file mode 100644 index 00000000..8031ca66 --- /dev/null +++ b/third_party/stdlib/email/feedparser.py @@ -0,0 +1,505 @@ +# Copyright (C) 2004-2006 Python Software Foundation +# Authors: Baxter, Wouters and Warsaw +# Contact: email-sig@python.org + +"""FeedParser - An email feed parser. + +The feed parser implements an interface for incrementally parsing an email +message, line by line. This has advantages for certain applications, such as +those reading email messages off a socket. + +FeedParser.feed() is the primary interface for pushing new data into the +parser. It returns when there's nothing more it can do with the available +data. When you have no more data to push into the parser, call .close(). +This completes the parsing and returns the root message object. + +The other advantage of this parser is that it will never raise a parsing +exception. Instead, when it finds something unexpected, it adds a 'defect' to +the current message. Defects are just instances that live on the message +object's .defects attribute. +""" + +__all__ = ['FeedParser'] + +import re + +from email import errors +from email import message + +NLCRE = re.compile('\r\n|\r|\n') +NLCRE_bol = re.compile('(\r\n|\r|\n)') +NLCRE_eol = re.compile('(\r\n|\r|\n)\Z') +NLCRE_crack = re.compile('(\r\n|\r|\n)') +# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character +# except controls, SP, and ":". +headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') +EMPTYSTRING = '' +NL = '\n' + +NeedMoreData = object() + + + +class BufferedSubFile(object): + """A file-ish object that can have new data loaded into it. + + You can also push and pop line-matching predicates onto a stack. When the + current predicate matches the current line, a false EOF response + (i.e. empty string) is returned instead. This lets the parser adhere to a + simple abstraction -- it parses until EOF closes the current message. + """ + def __init__(self): + # Chunks of the last partial line pushed into this object. + self._partial = [] + # The list of full, pushed lines, in reverse order + self._lines = [] + # The stack of false-EOF checking predicates. + self._eofstack = [] + # A flag indicating whether the file has been closed or not. + self._closed = False + + def push_eof_matcher(self, pred): + self._eofstack.append(pred) + + def pop_eof_matcher(self): + return self._eofstack.pop() + + def close(self): + # Don't forget any trailing partial line. + self.pushlines(''.join(self._partial).splitlines(True)) + self._partial = [] + self._closed = True + + def readline(self): + if not self._lines: + if self._closed: + return '' + return NeedMoreData + # Pop the line off the stack and see if it matches the current + # false-EOF predicate. + line = self._lines.pop() + # RFC 2046, section 5.1.2 requires us to recognize outer level + # boundaries at any level of inner nesting. Do this, but be sure it's + # in the order of most to least nested. + for ateof in self._eofstack[::-1]: + if ateof(line): + # We're at the false EOF. But push the last line back first. + self._lines.append(line) + return '' + return line + + def unreadline(self, line): + # Let the consumer push a line back into the buffer. + assert line is not NeedMoreData + self._lines.append(line) + + def push(self, data): + """Push some new data into this object.""" + # Crack into lines, but preserve the linesep characters on the end of each + parts = data.splitlines(True) + + if not parts or not parts[0].endswith(('\n', '\r')): + # No new complete lines, so just accumulate partials + self._partial += parts + return + + if self._partial: + # If there are previous leftovers, complete them now + self._partial.append(parts[0]) + parts[0:1] = ''.join(self._partial).splitlines(True) + del self._partial[:] + + # If the last element of the list does not end in a newline, then treat + # it as a partial line. We only check for '\n' here because a line + # ending with '\r' might be a line that was split in the middle of a + # '\r\n' sequence (see bugs 1555570 and 1721862). + if not parts[-1].endswith('\n'): + self._partial = [parts.pop()] + self.pushlines(parts) + + def pushlines(self, lines): + # Crack into lines, but preserve the newlines on the end of each + parts = NLCRE_crack.split(data) + # The *ahem* interesting behaviour of re.split when supplied grouping + # parentheses is that the last element of the resulting list is the + # data after the final RE. In the case of a NL/CR terminated string, + # this is the empty string. + self._partial = parts.pop() + #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r: + # is there a \n to follow later? + if not self._partial and parts and parts[-1].endswith('\r'): + self._partial = parts.pop(-2)+parts.pop() + # parts is a list of strings, alternating between the line contents + # and the eol character(s). Gather up a list of lines after + # re-attaching the newlines. + lines = [] + for i in range(len(parts) // 2): + lines.append(parts[i*2] + parts[i*2+1]) + self.pushlines(lines) + + def pushlines(self, lines): + # Reverse and insert at the front of the lines. + self._lines[:0] = lines[::-1] + + def is_closed(self): + return self._closed + + def __iter__(self): + return self + + def next(self): + line = self.readline() + if line == '': + raise StopIteration + return line + + + +class FeedParser: + """A feed-style parser of email.""" + + def __init__(self, _factory=message.Message): + """_factory is called with no arguments to create a new message obj""" + self._factory = _factory + self._input = BufferedSubFile() + self._msgstack = [] + self._parse = self._parsegen().next + self._cur = None + self._last = None + self._headersonly = False + + # Non-public interface for supporting Parser's headersonly flag + def _set_headersonly(self): + self._headersonly = True + + def feed(self, data): + """Push more data into the parser.""" + self._input.push(data) + self._call_parse() + + def _call_parse(self): + try: + self._parse() + except StopIteration: + pass + + def close(self): + """Parse all remaining data and return the root message object.""" + self._input.close() + self._call_parse() + root = self._pop_message() + assert not self._msgstack + # Look for final set of defects + if root.get_content_maintype() == 'multipart' \ + and not root.is_multipart(): + root.defects.append(errors.MultipartInvariantViolationDefect()) + return root + + def _new_message(self): + msg = self._factory() + if self._cur and self._cur.get_content_type() == 'multipart/digest': + msg.set_default_type('message/rfc822') + if self._msgstack: + self._msgstack[-1].attach(msg) + self._msgstack.append(msg) + self._cur = msg + self._last = msg + + def _pop_message(self): + retval = self._msgstack.pop() + if self._msgstack: + self._cur = self._msgstack[-1] + else: + self._cur = None + return retval + + def _parsegen(self): + # Create a new message and start by parsing headers. + self._new_message() + headers = [] + # Collect the headers, searching for a line that doesn't match the RFC + # 2822 header or continuation pattern (including an empty line). + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + if not headerRE.match(line): + # If we saw the RFC defined header/body separator + # (i.e. newline), just throw it away. Otherwise the line is + # part of the body so push it back. + if not NLCRE.match(line): + self._input.unreadline(line) + break + headers.append(line) + # Done with the headers, so parse them and figure out what we're + # supposed to see in the body of the message. + self._parse_headers(headers) + # Headers-only parsing is a backwards compatibility hack, which was + # necessary in the older parser, which could raise errors. All + # remaining lines in the input are thrown into the message body. + if self._headersonly: + lines = [] + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + if line == '': + break + lines.append(line) + self._cur.set_payload(EMPTYSTRING.join(lines)) + return + if self._cur.get_content_type() == 'message/delivery-status': + # message/delivery-status contains blocks of headers separated by + # a blank line. We'll represent each header block as a separate + # nested message object, but the processing is a bit different + # than standard message/* types because there is no body for the + # nested messages. A blank line separates the subparts. + while True: + self._input.push_eof_matcher(NLCRE.match) + for retval in self._parsegen(): + if retval is NeedMoreData: + yield NeedMoreData + continue + break + msg = self._pop_message() + # We need to pop the EOF matcher in order to tell if we're at + # the end of the current file, not the end of the last block + # of message headers. + self._input.pop_eof_matcher() + # The input stream must be sitting at the newline or at the + # EOF. We want to see if we're at the end of this subpart, so + # first consume the blank line, then test the next line to see + # if we're at this subpart's EOF. + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + break + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + break + if line == '': + break + # Not at EOF so this is a line we're going to need. + self._input.unreadline(line) + return + if self._cur.get_content_maintype() == 'message': + # The message claims to be a message/* type, then what follows is + # another RFC 2822 message. + for retval in self._parsegen(): + if retval is NeedMoreData: + yield NeedMoreData + continue + break + self._pop_message() + return + if self._cur.get_content_maintype() == 'multipart': + boundary = self._cur.get_boundary() + if boundary is None: + # The message /claims/ to be a multipart but it has not + # defined a boundary. That's a problem which we'll handle by + # reading everything until the EOF and marking the message as + # defective. + self._cur.defects.append(errors.NoBoundaryInMultipartDefect()) + lines = [] + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + lines.append(line) + self._cur.set_payload(EMPTYSTRING.join(lines)) + return + # Create a line match predicate which matches the inter-part + # boundary as well as the end-of-multipart boundary. Don't push + # this onto the input stream until we've scanned past the + # preamble. + separator = '--' + boundary + boundaryre = re.compile( + '(?P' + re.escape(separator) + + r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') + capturing_preamble = True + preamble = [] + linesep = False + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + if line == '': + break + mo = boundaryre.match(line) + if mo: + # If we're looking at the end boundary, we're done with + # this multipart. If there was a newline at the end of + # the closing boundary, then we need to initialize the + # epilogue with the empty string (see below). + if mo.group('end'): + linesep = mo.group('linesep') + break + # We saw an inter-part boundary. Were we in the preamble? + if capturing_preamble: + if preamble: + # According to RFC 2046, the last newline belongs + # to the boundary. + lastline = preamble[-1] + eolmo = NLCRE_eol.search(lastline) + if eolmo: + preamble[-1] = lastline[:-len(eolmo.group(0))] + self._cur.preamble = EMPTYSTRING.join(preamble) + capturing_preamble = False + self._input.unreadline(line) + continue + # We saw a boundary separating two parts. Consume any + # multiple boundary lines that may be following. Our + # interpretation of RFC 2046 BNF grammar does not produce + # body parts within such double boundaries. + while True: + line = self._input.readline() + if line is NeedMoreData: + yield NeedMoreData + continue + mo = boundaryre.match(line) + if not mo: + self._input.unreadline(line) + break + # Recurse to parse this subpart; the input stream points + # at the subpart's first line. + self._input.push_eof_matcher(boundaryre.match) + for retval in self._parsegen(): + if retval is NeedMoreData: + yield NeedMoreData + continue + break + # Because of RFC 2046, the newline preceding the boundary + # separator actually belongs to the boundary, not the + # previous subpart's payload (or epilogue if the previous + # part is a multipart). + if self._last.get_content_maintype() == 'multipart': + epilogue = self._last.epilogue + if epilogue == '': + self._last.epilogue = None + elif epilogue is not None: + mo = NLCRE_eol.search(epilogue) + if mo: + end = len(mo.group(0)) + self._last.epilogue = epilogue[:-end] + else: + payload = self._last.get_payload() + if isinstance(payload, basestring): + mo = NLCRE_eol.search(payload) + if mo: + payload = payload[:-len(mo.group(0))] + self._last.set_payload(payload) + self._input.pop_eof_matcher() + self._pop_message() + # Set the multipart up for newline cleansing, which will + # happen if we're in a nested multipart. + self._last = self._cur + else: + # I think we must be in the preamble + assert capturing_preamble + preamble.append(line) + # We've seen either the EOF or the end boundary. If we're still + # capturing the preamble, we never saw the start boundary. Note + # that as a defect and store the captured text as the payload. + # Everything from here to the EOF is epilogue. + if capturing_preamble: + self._cur.defects.append(errors.StartBoundaryNotFoundDefect()) + self._cur.set_payload(EMPTYSTRING.join(preamble)) + epilogue = [] + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + self._cur.epilogue = EMPTYSTRING.join(epilogue) + return + # If the end boundary ended in a newline, we'll need to make sure + # the epilogue isn't None + if linesep: + epilogue = [''] + else: + epilogue = [] + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + epilogue.append(line) + # Any CRLF at the front of the epilogue is not technically part of + # the epilogue. Also, watch out for an empty string epilogue, + # which means a single newline. + if epilogue: + firstline = epilogue[0] + bolmo = NLCRE_bol.match(firstline) + if bolmo: + epilogue[0] = firstline[len(bolmo.group(0)):] + self._cur.epilogue = EMPTYSTRING.join(epilogue) + return + # Otherwise, it's some non-multipart type, so the entire rest of the + # file contents becomes the payload. + lines = [] + for line in self._input: + if line is NeedMoreData: + yield NeedMoreData + continue + lines.append(line) + self._cur.set_payload(EMPTYSTRING.join(lines)) + + def _parse_headers(self, lines): + # Passed a list of lines that make up the headers for the current msg + lastheader = '' + lastvalue = [] + for lineno, line in enumerate(lines): + # Check for continuation + if line[0] in ' \t': + if not lastheader: + # The first line of the headers was a continuation. This + # is illegal, so let's note the defect, store the illegal + # line, and ignore it for purposes of headers. + defect = errors.FirstHeaderLineIsContinuationDefect(line) + self._cur.defects.append(defect) + continue + lastvalue.append(line) + continue + if lastheader: + # XXX reconsider the joining of folded lines + lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n') + self._cur[lastheader] = lhdr + lastheader, lastvalue = '', [] + # Check for envelope header, i.e. unix-from + if line.startswith('From '): + if lineno == 0: + # Strip off the trailing newline + mo = NLCRE_eol.search(line) + if mo: + line = line[:-len(mo.group(0))] + self._cur.set_unixfrom(line) + continue + elif lineno == len(lines) - 1: + # Something looking like a unix-from at the end - it's + # probably the first line of the body, so push back the + # line and stop. + self._input.unreadline(line) + return + else: + # Weirdly placed unix-from line. Note this as a defect + # and ignore it. + defect = errors.MisplacedEnvelopeHeaderDefect(line) + self._cur.defects.append(defect) + continue + # Split the line on the colon separating field name from value. + i = line.find(':') + if i < 0: + defect = errors.MalformedHeaderDefect(line) + self._cur.defects.append(defect) + continue + lastheader = line[:i] + lastvalue = [line[i+1:].lstrip()] + # Done with all the lines, so handle the last header. + if lastheader: + # XXX reconsider the joining of folded lines + self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n') diff --git a/third_party/stdlib/email/generator.py b/third_party/stdlib/email/generator.py new file mode 100644 index 00000000..e50f912c --- /dev/null +++ b/third_party/stdlib/email/generator.py @@ -0,0 +1,371 @@ +# Copyright (C) 2001-2010 Python Software Foundation +# Contact: email-sig@python.org + +"""Classes to generate plain text from a message object tree.""" + +__all__ = ['Generator', 'DecodedGenerator'] + +import re +import sys +import time +import random +import warnings + +from cStringIO import StringIO +from email.header import Header + +UNDERSCORE = '_' +NL = '\n' + +fcre = re.compile(r'^From ', re.MULTILINE) + +def _is8bitstring(s): + if isinstance(s, str): + try: + unicode(s, 'us-ascii') + except UnicodeError: + return True + return False + + + +class Generator: + """Generates output from a Message object tree. + + This basic generator writes the message to the given file object as plain + text. + """ + # + # Public interface + # + + def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): + """Create the generator for message flattening. + + outfp is the output file-like object for writing the message to. It + must have a write() method. + + Optional mangle_from_ is a flag that, when True (the default), escapes + From_ lines in the body of the message by putting a `>' in front of + them. + + Optional maxheaderlen specifies the longest length for a non-continued + header. When a header line is longer (in characters, with tabs + expanded to 8 spaces) than maxheaderlen, the header will split as + defined in the Header class. Set maxheaderlen to zero to disable + header wrapping. The default is 78, as recommended (but not required) + by RFC 2822. + """ + self._fp = outfp + self._mangle_from_ = mangle_from_ + self._maxheaderlen = maxheaderlen + + def write(self, s): + # Just delegate to the file object + self._fp.write(s) + + def flatten(self, msg, unixfrom=False): + """Print the message object tree rooted at msg to the output file + specified when the Generator instance was created. + + unixfrom is a flag that forces the printing of a Unix From_ delimiter + before the first object in the message tree. If the original message + has no From_ delimiter, a `standard' one is crafted. By default, this + is False to inhibit the printing of any From_ delimiter. + + Note that for subobjects, no From_ line is printed. + """ + if unixfrom: + ufrom = msg.get_unixfrom() + if not ufrom: + ufrom = 'From nobody ' + time.ctime(time.time()) + print >> self._fp, ufrom + self._write(msg) + + def clone(self, fp): + """Clone this generator with the exact same options.""" + return self.__class__(fp, self._mangle_from_, self._maxheaderlen) + + # + # Protected interface - undocumented ;/ + # + + def _write(self, msg): + # We can't write the headers yet because of the following scenario: + # say a multipart message includes the boundary string somewhere in + # its body. We'd have to calculate the new boundary /before/ we write + # the headers so that we can write the correct Content-Type: + # parameter. + # + # The way we do this, so as to make the _handle_*() methods simpler, + # is to cache any subpart writes into a StringIO. The we write the + # headers and the StringIO contents. That way, subpart handlers can + # Do The Right Thing, and can still modify the Content-Type: header if + # necessary. + oldfp = self._fp + try: + self._fp = sfp = StringIO() + self._dispatch(msg) + finally: + self._fp = oldfp + # Write the headers. First we see if the message object wants to + # handle that itself. If not, we'll do it generically. + meth = getattr(msg, '_write_headers', None) + if meth is None: + self._write_headers(msg) + else: + meth(self) + self._fp.write(sfp.getvalue()) + + def _dispatch(self, msg): + # Get the Content-Type: for the message, then try to dispatch to + # self._handle__(). If there's no handler for the + # full MIME type, then dispatch to self._handle_(). If + # that's missing too, then dispatch to self._writeBody(). + main = msg.get_content_maintype() + sub = msg.get_content_subtype() + specific = UNDERSCORE.join((main, sub)).replace('-', '_') + meth = getattr(self, '_handle_' + specific, None) + if meth is None: + generic = main.replace('-', '_') + meth = getattr(self, '_handle_' + generic, None) + if meth is None: + meth = self._writeBody + meth(msg) + + # + # Default handlers + # + + def _write_headers(self, msg): + for h, v in msg.items(): + print >> self._fp, '%s:' % h, + if self._maxheaderlen == 0: + # Explicit no-wrapping + print >> self._fp, v + elif isinstance(v, Header): + # Header instances know what to do + print >> self._fp, v.encode() + elif _is8bitstring(v): + # If we have raw 8bit data in a byte string, we have no idea + # what the encoding is. There is no safe way to split this + # string. If it's ascii-subset, then we could do a normal + # ascii split, but if it's multibyte then we could break the + # string. There's no way to know so the least harm seems to + # be to not split the string and risk it being too long. + print >> self._fp, v + else: + # Header's got lots of smarts, so use it. Note that this is + # fundamentally broken though because we lose idempotency when + # the header string is continued with tabs. It will now be + # continued with spaces. This was reversedly broken before we + # fixed bug 1974. Either way, we lose. + print >> self._fp, Header( + v, maxlinelen=self._maxheaderlen, header_name=h).encode() + # A blank line always separates headers from body + print >> self._fp + + # + # Handlers for writing types and subtypes + # + + def _handle_text(self, msg): + payload = msg.get_payload() + if payload is None: + return + if not isinstance(payload, basestring): + raise TypeError('string payload expected: %s' % type(payload)) + if self._mangle_from_: + payload = fcre.sub('>From ', payload) + self._fp.write(payload) + + # Default body handler + _writeBody = _handle_text + + def _handle_multipart(self, msg): + # The trick here is to write out each part separately, merge them all + # together, and then make sure that the boundary we've chosen isn't + # present in the payload. + msgtexts = [] + subparts = msg.get_payload() + if subparts is None: + subparts = [] + elif isinstance(subparts, basestring): + # e.g. a non-strict parse of a message with no starting boundary. + self._fp.write(subparts) + return + elif not isinstance(subparts, list): + # Scalar payload + subparts = [subparts] + for part in subparts: + s = StringIO() + g = self.clone(s) + g.flatten(part, unixfrom=False) + msgtexts.append(s.getvalue()) + # BAW: What about boundaries that are wrapped in double-quotes? + boundary = msg.get_boundary() + if not boundary: + # Create a boundary that doesn't appear in any of the + # message texts. + alltext = NL.join(msgtexts) + boundary = _make_boundary(alltext) + msg.set_boundary(boundary) + # If there's a preamble, write it out, with a trailing CRLF + if msg.preamble is not None: + if self._mangle_from_: + preamble = fcre.sub('>From ', msg.preamble) + else: + preamble = msg.preamble + print >> self._fp, preamble + # dash-boundary transport-padding CRLF + print >> self._fp, '--' + boundary + # body-part + if msgtexts: + self._fp.write(msgtexts.pop(0)) + # *encapsulation + # --> delimiter transport-padding + # --> CRLF body-part + for body_part in msgtexts: + # delimiter transport-padding CRLF + print >> self._fp, '\n--' + boundary + # body-part + self._fp.write(body_part) + # close-delimiter transport-padding + self._fp.write('\n--' + boundary + '--' + NL) + if msg.epilogue is not None: + if self._mangle_from_: + epilogue = fcre.sub('>From ', msg.epilogue) + else: + epilogue = msg.epilogue + self._fp.write(epilogue) + + def _handle_multipart_signed(self, msg): + # The contents of signed parts has to stay unmodified in order to keep + # the signature intact per RFC1847 2.1, so we disable header wrapping. + # RDM: This isn't enough to completely preserve the part, but it helps. + old_maxheaderlen = self._maxheaderlen + try: + self._maxheaderlen = 0 + self._handle_multipart(msg) + finally: + self._maxheaderlen = old_maxheaderlen + + def _handle_message_delivery_status(self, msg): + # We can't just write the headers directly to self's file object + # because this will leave an extra newline between the last header + # block and the boundary. Sigh. + blocks = [] + for part in msg.get_payload(): + s = StringIO() + g = self.clone(s) + g.flatten(part, unixfrom=False) + text = s.getvalue() + lines = text.split('\n') + # Strip off the unnecessary trailing empty line + if lines and lines[-1] == '': + blocks.append(NL.join(lines[:-1])) + else: + blocks.append(text) + # Now join all the blocks with an empty line. This has the lovely + # effect of separating each block with an empty line, but not adding + # an extra one after the last one. + self._fp.write(NL.join(blocks)) + + def _handle_message(self, msg): + s = StringIO() + g = self.clone(s) + # The payload of a message/rfc822 part should be a multipart sequence + # of length 1. The zeroth element of the list should be the Message + # object for the subpart. Extract that object, stringify it, and + # write it out. + # Except, it turns out, when it's a string instead, which happens when + # and only when HeaderParser is used on a message of mime type + # message/rfc822. Such messages are generated by, for example, + # Groupwise when forwarding unadorned messages. (Issue 7970.) So + # in that case we just emit the string body. + payload = msg.get_payload() + if isinstance(payload, list): + g.flatten(msg.get_payload(0), unixfrom=False) + payload = s.getvalue() + self._fp.write(payload) + + + +_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' + +class DecodedGenerator(Generator): + """Generates a text representation of a message. + + Like the Generator base class, except that non-text parts are substituted + with a format string representing the part. + """ + def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): + """Like Generator.__init__() except that an additional optional + argument is allowed. + + Walks through all subparts of a message. If the subpart is of main + type `text', then it prints the decoded payload of the subpart. + + Otherwise, fmt is a format string that is used instead of the message + payload. fmt is expanded with the following keywords (in + %(keyword)s format): + + type : Full MIME type of the non-text part + maintype : Main MIME type of the non-text part + subtype : Sub-MIME type of the non-text part + filename : Filename of the non-text part + description: Description associated with the non-text part + encoding : Content transfer encoding of the non-text part + + The default value for fmt is None, meaning + + [Non-text (%(type)s) part of message omitted, filename %(filename)s] + """ + Generator.__init__(self, outfp, mangle_from_, maxheaderlen) + if fmt is None: + self._fmt = _FMT + else: + self._fmt = fmt + + def _dispatch(self, msg): + for part in msg.walk(): + maintype = part.get_content_maintype() + if maintype == 'text': + print >> self, part.get_payload(decode=True) + elif maintype == 'multipart': + # Just skip this + pass + else: + print >> self, self._fmt % { + 'type' : part.get_content_type(), + 'maintype' : part.get_content_maintype(), + 'subtype' : part.get_content_subtype(), + 'filename' : part.get_filename('[no filename]'), + 'description': part.get('Content-Description', + '[no description]'), + 'encoding' : part.get('Content-Transfer-Encoding', + '[no encoding]'), + } + + + +# Helper +_width = len(repr(sys.maxint-1)) +_fmt = '%%0%dd' % _width + +def _make_boundary(text=None): + # Craft a random boundary. If text is given, ensure that the chosen + # boundary doesn't appear in the text. + token = random.randrange(sys.maxint) + boundary = ('=' * 15) + (_fmt % token) + '==' + if text is None: + return boundary + b = boundary + counter = 0 + while True: + cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) + if not cre.search(text): + break + b = boundary + '.' + str(counter) + counter += 1 + return b diff --git a/third_party/stdlib/email/header.py b/third_party/stdlib/email/header.py new file mode 100644 index 00000000..2cf870fd --- /dev/null +++ b/third_party/stdlib/email/header.py @@ -0,0 +1,514 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org + +"""Header encoding and decoding functionality.""" + +__all__ = [ + 'Header', + 'decode_header', + 'make_header', + ] + +import re +import binascii + +import email.quoprimime +import email.base64mime + +from email.errors import HeaderParseError +from email.charset import Charset + +NL = '\n' +SPACE = ' ' +USPACE = u' ' +SPACE8 = ' ' * 8 +UEMPTYSTRING = u'' + +MAXLINELEN = 76 + +USASCII = Charset('us-ascii') +UTF8 = Charset('utf-8') + +# Match encoded-word strings in the form =?charset?q?Hello_World?= +ecre = re.compile(r''' + =\? # literal =? + (?P[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P.*?) # non-greedy up to the next ?= is the encoded string + \?= # literal ?= + (?=[ \t]|$) # whitespace or the end of the string + ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + +# Field name regexp, including trailing colon, but not separating whitespace, +# according to RFC 2822. Character range is from tilde to exclamation mark. +# For use with .match() +fcre = re.compile(r'[\041-\176]+:$') + +# Find a header embedded in a putative header value. Used to check for +# header injection attack. +_embeded_header = re.compile(r'\n[^ \t]+:') + + + +# Helpers +_max_append = email.quoprimime._max_append + + + +def decode_header(header): + """Decode a message header value without converting charset. + + Returns a list of (decoded_string, charset) pairs containing each of the + decoded parts of the header. Charset is None for non-encoded parts of the + header, otherwise a lower-case string containing the name of the character + set specified in the encoded string. + + An email.errors.HeaderParseError may be raised when certain decoding error + occurs (e.g. a base64 decoding exception). + """ + # If no encoding, just return the header + header = str(header) + if not ecre.search(header): + return [(header, None)] + decoded = [] + dec = '' + for line in header.splitlines(): + # This line might not have an encoding in it + if not ecre.search(line): + decoded.append((line, None)) + continue + parts = ecre.split(line) + while parts: + unenc = parts.pop(0).strip() + if unenc: + # Should we continue a long line? + if decoded and decoded[-1][1] is None: + decoded[-1] = (decoded[-1][0] + SPACE + unenc, None) + else: + decoded.append((unenc, None)) + if parts: + charset, encoding = [s.lower() for s in parts[0:2]] + encoded = parts[2] + dec = None + if encoding == 'q': + dec = email.quoprimime.header_decode(encoded) + elif encoding == 'b': + paderr = len(encoded) % 4 # Postel's law: add missing padding + if paderr: + encoded += '==='[:4 - paderr] + try: + dec = email.base64mime.decode(encoded) + except binascii.Error: + # Turn this into a higher level exception. BAW: Right + # now we throw the lower level exception away but + # when/if we get exception chaining, we'll preserve it. + raise HeaderParseError + if dec is None: + dec = encoded + + if decoded and decoded[-1][1] == charset: + decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1]) + else: + decoded.append((dec, charset)) + del parts[0:3] + return decoded + + + +def make_header(decoded_seq, maxlinelen=None, header_name=None, + continuation_ws=' '): + """Create a Header from a sequence of pairs as returned by decode_header() + + decode_header() takes a header value string and returns a sequence of + pairs of the format (decoded_string, charset) where charset is the string + name of the character set. + + This function takes one of those sequence of pairs and returns a Header + instance. Optional maxlinelen, header_name, and continuation_ws are as in + the Header constructor. + """ + h = Header(maxlinelen=maxlinelen, header_name=header_name, + continuation_ws=continuation_ws) + for s, charset in decoded_seq: + # None means us-ascii but we can simply pass it on to h.append() + if charset is not None and not isinstance(charset, Charset): + charset = Charset(charset) + h.append(s, charset) + return h + + + +class Header: + def __init__(self, s=None, charset=None, + maxlinelen=None, header_name=None, + continuation_ws=' ', errors='strict'): + """Create a MIME-compliant header that can contain many character sets. + + Optional s is the initial header value. If None, the initial header + value is not set. You can later append to the header with .append() + method calls. s may be a byte string or a Unicode string, but see the + .append() documentation for semantics. + + Optional charset serves two purposes: it has the same meaning as the + charset argument to the .append() method. It also sets the default + character set for all subsequent .append() calls that omit the charset + argument. If charset is not provided in the constructor, the us-ascii + charset is used both as s's initial charset and as the default for + subsequent .append() calls. + + The maximum line length can be specified explicit via maxlinelen. For + splitting the first line to a shorter value (to account for the field + header which isn't included in s, e.g. `Subject') pass in the name of + the field in header_name. The default maxlinelen is 76. + + continuation_ws must be RFC 2822 compliant folding whitespace (usually + either a space or a hard tab) which will be prepended to continuation + lines. + + errors is passed through to the .append() call. + """ + if charset is None: + charset = USASCII + if not isinstance(charset, Charset): + charset = Charset(charset) + self._charset = charset + self._continuation_ws = continuation_ws + cws_expanded_len = len(continuation_ws.replace('\t', SPACE8)) + # BAW: I believe `chunks' and `maxlinelen' should be non-public. + self._chunks = [] + if s is not None: + self.append(s, charset, errors) + if maxlinelen is None: + maxlinelen = MAXLINELEN + if header_name is None: + # We don't know anything about the field header so the first line + # is the same length as subsequent lines. + self._firstlinelen = maxlinelen + else: + # The first line should be shorter to take into account the field + # header. Also subtract off 2 extra for the colon and space. + self._firstlinelen = maxlinelen - len(header_name) - 2 + # Second and subsequent lines should subtract off the length in + # columns of the continuation whitespace prefix. + self._maxlinelen = maxlinelen - cws_expanded_len + + def __str__(self): + """A synonym for self.encode().""" + return self.encode() + + def __unicode__(self): + """Helper for the built-in unicode function.""" + uchunks = [] + lastcs = None + for s, charset in self._chunks: + # We must preserve spaces between encoded and non-encoded word + # boundaries, which means for us we need to add a space when we go + # from a charset to None/us-ascii, or from None/us-ascii to a + # charset. Only do this for the second and subsequent chunks. + nextcs = charset + if uchunks: + if lastcs not in (None, 'us-ascii'): + if nextcs in (None, 'us-ascii'): + uchunks.append(USPACE) + nextcs = None + elif nextcs not in (None, 'us-ascii'): + uchunks.append(USPACE) + lastcs = nextcs + uchunks.append(unicode(s, str(charset))) + return UEMPTYSTRING.join(uchunks) + + # Rich comparison operators for equality only. BAW: does it make sense to + # have or explicitly disable <, <=, >, >= operators? + def __eq__(self, other): + # other may be a Header or a string. Both are fine so coerce + # ourselves to a string, swap the args and do another comparison. + return other == self.encode() + + def __ne__(self, other): + return not self == other + + def append(self, s, charset=None, errors='strict'): + """Append a string to the MIME header. + + Optional charset, if given, should be a Charset instance or the name + of a character set (which will be converted to a Charset instance). A + value of None (the default) means that the charset given in the + constructor is used. + + s may be a byte string or a Unicode string. If it is a byte string + (i.e. isinstance(s, str) is true), then charset is the encoding of + that byte string, and a UnicodeError will be raised if the string + cannot be decoded with that charset. If s is a Unicode string, then + charset is a hint specifying the character set of the characters in + the string. In this case, when producing an RFC 2822 compliant header + using RFC 2047 rules, the Unicode string will be encoded using the + following charsets in order: us-ascii, the charset hint, utf-8. The + first character set not to provoke a UnicodeError is used. + + Optional `errors' is passed as the third argument to any unicode() or + ustr.encode() call. + """ + if charset is None: + charset = self._charset + elif not isinstance(charset, Charset): + charset = Charset(charset) + # If the charset is our faux 8bit charset, leave the string unchanged + if charset != '8bit': + # We need to test that the string can be converted to unicode and + # back to a byte string, given the input and output codecs of the + # charset. + if isinstance(s, str): + # Possibly raise UnicodeError if the byte string can't be + # converted to a unicode with the input codec of the charset. + incodec = charset.input_codec or 'us-ascii' + ustr = unicode(s, incodec, errors) + # Now make sure that the unicode could be converted back to a + # byte string with the output codec, which may be different + # than the iput coded. Still, use the original byte string. + outcodec = charset.output_codec or 'us-ascii' + ustr.encode(outcodec, errors) + elif isinstance(s, unicode): + # Now we have to be sure the unicode string can be converted + # to a byte string with a reasonable output codec. We want to + # use the byte string in the chunk. + for charset in USASCII, charset, UTF8: + try: + outcodec = charset.output_codec or 'us-ascii' + s = s.encode(outcodec, errors) + break + except UnicodeError: + pass + else: + assert False, 'utf-8 conversion failed' + self._chunks.append((s, charset)) + + def _split(self, s, charset, maxlinelen, splitchars): + # Split up a header safely for use with encode_chunks. + splittable = charset.to_splittable(s) + encoded = charset.from_splittable(splittable, True) + elen = charset.encoded_header_len(encoded) + # If the line's encoded length first, just return it + if elen <= maxlinelen: + return [(encoded, charset)] + # If we have undetermined raw 8bit characters sitting in a byte + # string, we really don't know what the right thing to do is. We + # can't really split it because it might be multibyte data which we + # could break if we split it between pairs. The least harm seems to + # be to not split the header at all, but that means they could go out + # longer than maxlinelen. + if charset == '8bit': + return [(s, charset)] + # BAW: I'm not sure what the right test here is. What we're trying to + # do is be faithful to RFC 2822's recommendation that ($2.2.3): + # + # "Note: Though structured field bodies are defined in such a way that + # folding can take place between many of the lexical tokens (and even + # within some of the lexical tokens), folding SHOULD be limited to + # placing the CRLF at higher-level syntactic breaks." + # + # For now, I can only imagine doing this when the charset is us-ascii, + # although it's possible that other charsets may also benefit from the + # higher-level syntactic breaks. + elif charset == 'us-ascii': + return self._split_ascii(s, charset, maxlinelen, splitchars) + # BAW: should we use encoded? + elif elen == len(s): + # We can split on _maxlinelen boundaries because we know that the + # encoding won't change the size of the string + splitpnt = maxlinelen + first = charset.from_splittable(splittable[:splitpnt], False) + last = charset.from_splittable(splittable[splitpnt:], False) + else: + # Binary search for split point + first, last = _binsplit(splittable, charset, maxlinelen) + # first is of the proper length so just wrap it in the appropriate + # chrome. last must be recursively split. + fsplittable = charset.to_splittable(first) + fencoded = charset.from_splittable(fsplittable, True) + chunk = [(fencoded, charset)] + return chunk + self._split(last, charset, self._maxlinelen, splitchars) + + def _split_ascii(self, s, charset, firstlen, splitchars): + chunks = _split_ascii(s, firstlen, self._maxlinelen, + self._continuation_ws, splitchars) + return zip(chunks, [charset]*len(chunks)) + + def _encode_chunks(self, newchunks, maxlinelen): + # MIME-encode a header with many different charsets and/or encodings. + # + # Given a list of pairs (string, charset), return a MIME-encoded + # string suitable for use in a header field. Each pair may have + # different charsets and/or encodings, and the resulting header will + # accurately reflect each setting. + # + # Each encoding can be email.utils.QP (quoted-printable, for + # ASCII-like character sets like iso-8859-1), email.utils.BASE64 + # (Base64, for non-ASCII like character sets like KOI8-R and + # iso-2022-jp), or None (no encoding). + # + # Each pair will be represented on a separate line; the resulting + # string will be in the format: + # + # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n + # =?charset2?b?SvxyZ2VuIEL2aW5n?=" + chunks = [] + for header, charset in newchunks: + if not header: + continue + if charset is None or charset.header_encoding is None: + s = header + else: + s = charset.header_encode(header) + # Don't add more folding whitespace than necessary + if chunks and chunks[-1].endswith(' '): + extra = '' + else: + extra = ' ' + _max_append(chunks, s, maxlinelen, extra) + joiner = NL + self._continuation_ws + return joiner.join(chunks) + + def encode(self, splitchars=';, '): + """Encode a message header into an RFC-compliant format. + + There are many issues involved in converting a given string for use in + an email header. Only certain character sets are readable in most + email clients, and as header strings can only contain a subset of + 7-bit ASCII, care must be taken to properly convert and encode (with + Base64 or quoted-printable) header strings. In addition, there is a + 75-character length limit on any given encoded header field, so + line-wrapping must be performed, even with double-byte character sets. + + This method will do its best to convert the string to the correct + character set used in email, and encode and line wrap it safely with + the appropriate scheme for that character set. + + If the given charset is not known or an error occurs during + conversion, this function will return the header untouched. + + Optional splitchars is a string containing characters to split long + ASCII lines on, in rough support of RFC 2822's `highest level + syntactic breaks'. This doesn't affect RFC 2047 encoded lines. + """ + newchunks = [] + maxlinelen = self._firstlinelen + lastlen = 0 + for s, charset in self._chunks: + # The first bit of the next chunk should be just long enough to + # fill the next line. Don't forget the space separating the + # encoded words. + targetlen = maxlinelen - lastlen - 1 + if targetlen < charset.encoded_header_len(''): + # Stick it on the next line + targetlen = maxlinelen + newchunks += self._split(s, charset, targetlen, splitchars) + lastchunk, lastcharset = newchunks[-1] + lastlen = lastcharset.encoded_header_len(lastchunk) + value = self._encode_chunks(newchunks, maxlinelen) + if _embeded_header.search(value): + raise HeaderParseError("header value appears to contain " + "an embedded header: {!r}".format(value)) + return value + + + +def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars): + lines = [] + maxlen = firstlen + for line in s.splitlines(): + # Ignore any leading whitespace (i.e. continuation whitespace) already + # on the line, since we'll be adding our own. + line = line.lstrip() + if len(line) < maxlen: + lines.append(line) + maxlen = restlen + continue + # Attempt to split the line at the highest-level syntactic break + # possible. Note that we don't have a lot of smarts about field + # syntax; we just try to break on semi-colons, then commas, then + # whitespace. + for ch in splitchars: + if ch in line: + break + else: + # There's nothing useful to split the line on, not even spaces, so + # just append this line unchanged + lines.append(line) + maxlen = restlen + continue + # Now split the line on the character plus trailing whitespace + cre = re.compile(r'%s\s*' % ch) + if ch in ';,': + eol = ch + else: + eol = '' + joiner = eol + ' ' + joinlen = len(joiner) + wslen = len(continuation_ws.replace('\t', SPACE8)) + this = [] + linelen = 0 + for part in cre.split(line): + curlen = linelen + max(0, len(this)-1) * joinlen + partlen = len(part) + onfirstline = not lines + # We don't want to split after the field name, if we're on the + # first line and the field name is present in the header string. + if ch == ' ' and onfirstline and \ + len(this) == 1 and fcre.match(this[0]): + this.append(part) + linelen += partlen + elif curlen + partlen > maxlen: + if this: + lines.append(joiner.join(this) + eol) + # If this part is longer than maxlen and we aren't already + # splitting on whitespace, try to recursively split this line + # on whitespace. + if partlen > maxlen and ch != ' ': + subl = _split_ascii(part, maxlen, restlen, + continuation_ws, ' ') + lines.extend(subl[:-1]) + this = [subl[-1]] + else: + this = [part] + linelen = wslen + len(this[-1]) + maxlen = restlen + else: + this.append(part) + linelen += partlen + # Put any left over parts on a line by themselves + if this: + lines.append(joiner.join(this)) + return lines + + + +def _binsplit(splittable, charset, maxlinelen): + i = 0 + j = len(splittable) + while i < j: + # Invariants: + # 1. splittable[:k] fits for all k <= i (note that we *assume*, + # at the start, that splittable[:0] fits). + # 2. splittable[:k] does not fit for any k > j (at the start, + # this means we shouldn't look at any k > len(splittable)). + # 3. We don't know about splittable[:k] for k in i+1..j. + # 4. We want to set i to the largest k that fits, with i <= k <= j. + # + m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j + chunk = charset.from_splittable(splittable[:m], True) + chunklen = charset.encoded_header_len(chunk) + if chunklen <= maxlinelen: + # m is acceptable, so is a new lower bound. + i = m + else: + # m is not acceptable, so final i must be < m. + j = m - 1 + # i == j. Invariant #1 implies that splittable[:i] fits, and + # invariant #2 implies that splittable[:i+1] does not fit, so i + # is what we're looking for. + first = charset.from_splittable(splittable[:i], False) + last = charset.from_splittable(splittable[i:], False) + return first, last diff --git a/third_party/stdlib/email/iterators.py b/third_party/stdlib/email/iterators.py new file mode 100644 index 00000000..e99f2280 --- /dev/null +++ b/third_party/stdlib/email/iterators.py @@ -0,0 +1,73 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Various types of useful iterators and generators.""" + +__all__ = [ + 'body_line_iterator', + 'typed_subpart_iterator', + 'walk', + # Do not include _structure() since it's part of the debugging API. + ] + +import sys +from cStringIO import StringIO + + + +# This function will become a method of the Message class +def walk(self): + """Walk over the message tree, yielding each subpart. + + The walk is performed in depth-first order. This method is a + generator. + """ + yield self + if self.is_multipart(): + for subpart in self.get_payload(): + for subsubpart in subpart.walk(): + yield subsubpart + + + +# These two functions are imported into the Iterators.py interface module. +def body_line_iterator(msg, decode=False): + """Iterate over the parts, returning string payloads line-by-line. + + Optional decode (default False) is passed through to .get_payload(). + """ + for subpart in msg.walk(): + payload = subpart.get_payload(decode=decode) + if isinstance(payload, basestring): + for line in StringIO(payload): + yield line + + +def typed_subpart_iterator(msg, maintype='text', subtype=None): + """Iterate over the subparts with a given MIME type. + + Use `maintype' as the main MIME type to match against; this defaults to + "text". Optional `subtype' is the MIME subtype to match against; if + omitted, only the main type is matched. + """ + for subpart in msg.walk(): + if subpart.get_content_maintype() == maintype: + if subtype is None or subpart.get_content_subtype() == subtype: + yield subpart + + + +def _structure(msg, fp=None, level=0, include_default=False): + """A handy debugging aid""" + if fp is None: + fp = sys.stdout + tab = ' ' * (level * 4) + print >> fp, tab + msg.get_content_type(), + if include_default: + print >> fp, '[%s]' % msg.get_default_type() + else: + print >> fp + if msg.is_multipart(): + for subpart in msg.get_payload(): + _structure(subpart, fp, level+1, include_default) diff --git a/third_party/stdlib/email/message.py b/third_party/stdlib/email/message.py new file mode 100644 index 00000000..d7358cd6 --- /dev/null +++ b/third_party/stdlib/email/message.py @@ -0,0 +1,797 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Basic message object for the email package object model.""" + +__all__ = ['Message'] + +import re +import uu +import binascii +import warnings +from cStringIO import StringIO + +# Intrapackage imports +import email.charset +from email import utils +from email import errors + +SEMISPACE = '; ' + +# Regular expression that matches `special' characters in parameters, the +# existence of which force quoting of the parameter value. +tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') + + +# Helper functions +def _splitparam(param): + # Split header parameters. BAW: this may be too simple. It isn't + # strictly RFC 2045 (section 5.1) compliant, but it catches most headers + # found in the wild. We may eventually need a full fledged parser + # eventually. + a, sep, b = param.partition(';') + if not sep: + return a.strip(), None + return a.strip(), b.strip() + +def _formatparam(param, value=None, quote=True): + """Convenience function to format and return a key=value pair. + + This will quote the value if needed or if quote is true. If value is a + three tuple (charset, language, value), it will be encoded according + to RFC2231 rules. + """ + if value is not None and len(value) > 0: + # A tuple is used for RFC 2231 encoded parameter values where items + # are (charset, language, value). charset is a string, not a Charset + # instance. + if isinstance(value, tuple): + # Encode as per RFC 2231 + param += '*' + value = utils.encode_rfc2231(value[2], value[0], value[1]) + # BAW: Please check this. I think that if quote is set it should + # force quoting even if not necessary. + if quote or tspecials.search(value): + return '%s="%s"' % (param, utils.quote(value)) + else: + return '%s=%s' % (param, value) + else: + return param + +def _parseparam(s): + plist = [] + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + if '=' in f: + i = f.index('=') + f = f[:i].strip().lower() + '=' + f[i+1:].strip() + plist.append(f.strip()) + s = s[end:] + return plist + + +def _unquotevalue(value): + # This is different than utils.collapse_rfc2231_value() because it doesn't + # try to convert the value to a unicode. Message.get_param() and + # Message.get_params() are both currently defined to return the tuple in + # the face of RFC 2231 parameters. + if isinstance(value, tuple): + return value[0], value[1], utils.unquote(value[2]) + else: + return utils.unquote(value) + + + +class Message: + """Basic message object. + + A message object is defined as something that has a bunch of RFC 2822 + headers and a payload. It may optionally have an envelope header + (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a + multipart or a message/rfc822), then the payload is a list of Message + objects, otherwise it is a string. + + Message objects implement part of the `mapping' interface, which assumes + there is exactly one occurrence of the header per message. Some headers + do in fact appear multiple times (e.g. Received) and for those headers, + you must use the explicit API to set or get all the headers. Not all of + the mapping methods are implemented. + """ + def __init__(self): + self._headers = [] + self._unixfrom = None + self._payload = None + self._charset = None + # Defaults for multipart messages + self.preamble = self.epilogue = None + self.defects = [] + # Default content type + self._default_type = 'text/plain' + + def __str__(self): + """Return the entire formatted message as a string. + This includes the headers, body, and envelope header. + """ + return self.as_string(unixfrom=True) + + def as_string(self, unixfrom=False): + """Return the entire formatted message as a string. + Optional `unixfrom' when True, means include the Unix From_ envelope + header. + + This is a convenience method and may not generate the message exactly + as you intend because by default it mangles lines that begin with + "From ". For more flexibility, use the flatten() method of a + Generator instance. + """ + from email.generator import Generator + fp = StringIO() + g = Generator(fp) + g.flatten(self, unixfrom=unixfrom) + return fp.getvalue() + + def is_multipart(self): + """Return True if the message consists of multiple parts.""" + return isinstance(self._payload, list) + + # + # Unix From_ line + # + def set_unixfrom(self, unixfrom): + self._unixfrom = unixfrom + + def get_unixfrom(self): + return self._unixfrom + + # + # Payload manipulation. + # + def attach(self, payload): + """Add the given payload to the current payload. + + The current payload will always be a list of objects after this method + is called. If you want to set the payload to a scalar object, use + set_payload() instead. + """ + if self._payload is None: + self._payload = [payload] + else: + self._payload.append(payload) + + def get_payload(self, i=None, decode=False): + """Return a reference to the payload. + + The payload will either be a list object or a string. If you mutate + the list object, you modify the message's payload in place. Optional + i returns that index into the payload. + + Optional decode is a flag indicating whether the payload should be + decoded or not, according to the Content-Transfer-Encoding header + (default is False). + + When True and the message is not a multipart, the payload will be + decoded if this header's value is `quoted-printable' or `base64'. If + some other encoding is used, or the header is missing, or if the + payload has bogus data (i.e. bogus base64 or uuencoded data), the + payload is returned as-is. + + If the message is a multipart and the decode flag is True, then None + is returned. + """ + if i is None: + payload = self._payload + elif not isinstance(self._payload, list): + raise TypeError('Expected list, got %s' % type(self._payload)) + else: + payload = self._payload[i] + if decode: + if self.is_multipart(): + return None + cte = self.get('content-transfer-encoding', '').lower() + if cte == 'quoted-printable': + return utils._qdecode(payload) + elif cte == 'base64': + try: + return utils._bdecode(payload) + except binascii.Error: + # Incorrect padding + return payload + elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): + sfp = StringIO() + try: + uu.decode(StringIO(payload+'\n'), sfp, quiet=True) + payload = sfp.getvalue() + except uu.Error: + # Some decoding problem + return payload + # Everything else, including encodings with 8bit or 7bit are returned + # unchanged. + return payload + + def set_payload(self, payload, charset=None): + """Set the payload to the given value. + + Optional charset sets the message's default character set. See + set_charset() for details. + """ + self._payload = payload + if charset is not None: + self.set_charset(charset) + + def set_charset(self, charset): + """Set the charset of the payload to a given character set. + + charset can be a Charset instance, a string naming a character set, or + None. If it is a string it will be converted to a Charset instance. + If charset is None, the charset parameter will be removed from the + Content-Type field. Anything else will generate a TypeError. + + The message will be assumed to be of type text/* encoded with + charset.input_charset. It will be converted to charset.output_charset + and encoded properly, if needed, when generating the plain text + representation of the message. MIME headers (MIME-Version, + Content-Type, Content-Transfer-Encoding) will be added as needed. + + """ + if charset is None: + self.del_param('charset') + self._charset = None + return + if isinstance(charset, basestring): + charset = email.charset.Charset(charset) + if not isinstance(charset, email.charset.Charset): + raise TypeError(charset) + # BAW: should we accept strings that can serve as arguments to the + # Charset constructor? + self._charset = charset + if 'MIME-Version' not in self: + self.add_header('MIME-Version', '1.0') + if 'Content-Type' not in self: + self.add_header('Content-Type', 'text/plain', + charset=charset.get_output_charset()) + else: + self.set_param('charset', charset.get_output_charset()) + if isinstance(self._payload, unicode): + self._payload = self._payload.encode(charset.output_charset) + if str(charset) != charset.get_output_charset(): + self._payload = charset.body_encode(self._payload) + if 'Content-Transfer-Encoding' not in self: + cte = charset.get_body_encoding() + try: + cte(self) + except TypeError: + self._payload = charset.body_encode(self._payload) + self.add_header('Content-Transfer-Encoding', cte) + + def get_charset(self): + """Return the Charset instance associated with the message's payload. + """ + return self._charset + + # + # MAPPING INTERFACE (partial) + # + def __len__(self): + """Return the total number of headers, including duplicates.""" + return len(self._headers) + + def __getitem__(self, name): + """Get a header value. + + Return None if the header is missing instead of raising an exception. + + Note that if the header appeared multiple times, exactly which + occurrence gets returned is undefined. Use get_all() to get all + the values matching a header field name. + """ + return self.get(name) + + def __setitem__(self, name, val): + """Set the value of a header. + + Note: this does not overwrite an existing header with the same field + name. Use __delitem__() first to delete any existing headers. + """ + self._headers.append((name, val)) + + def __delitem__(self, name): + """Delete all occurrences of a header, if present. + + Does not raise an exception if the header is missing. + """ + name = name.lower() + newheaders = [] + for k, v in self._headers: + if k.lower() != name: + newheaders.append((k, v)) + self._headers = newheaders + + def __contains__(self, name): + return name.lower() in [k.lower() for k, v in self._headers] + + def has_key(self, name): + """Return true if the message contains the header.""" + missing = object() + return self.get(name, missing) is not missing + + def keys(self): + """Return a list of all the message's header field names. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [k for k, v in self._headers] + + def values(self): + """Return a list of all the message's header values. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [v for k, v in self._headers] + + def items(self): + """Get all the message's header fields and values. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return self._headers[:] + + def get(self, name, failobj=None): + """Get a header value. + + Like __getitem__() but return failobj instead of None when the field + is missing. + """ + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + return v + return failobj + + # + # Additional useful stuff + # + + def get_all(self, name, failobj=None): + """Return a list of all the values for the named field. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are always appended to the header list. + + If no such fields exist, failobj is returned (defaults to None). + """ + values = [] + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + values.append(v) + if not values: + return failobj + return values + + def add_header(self, _name, _value, **_params): + """Extended header setting. + + name is the header field to add. keyword arguments can be used to set + additional parameters for the header field, with underscores converted + to dashes. Normally the parameter will be added as key="value" unless + value is None, in which case only the key will be added. If a + parameter value contains non-ASCII characters it must be specified as a + three-tuple of (charset, language, value), in which case it will be + encoded according to RFC2231 rules. + + Example: + + msg.add_header('content-disposition', 'attachment', filename='bud.gif') + """ + parts = [] + for k, v in _params.items(): + if v is None: + parts.append(k.replace('_', '-')) + else: + parts.append(_formatparam(k.replace('_', '-'), v)) + if _value is not None: + parts.insert(0, _value) + self._headers.append((_name, SEMISPACE.join(parts))) + + def replace_header(self, _name, _value): + """Replace a header. + + Replace the first matching header found in the message, retaining + header order and case. If no matching header was found, a KeyError is + raised. + """ + _name = _name.lower() + for i, (k, v) in zip(range(len(self._headers)), self._headers): + if k.lower() == _name: + self._headers[i] = (k, _value) + break + else: + raise KeyError(_name) + + # + # Use these three methods instead of the three above. + # + + def get_content_type(self): + """Return the message's content type. + + The returned string is coerced to lower case of the form + `maintype/subtype'. If there was no Content-Type header in the + message, the default type as given by get_default_type() will be + returned. Since according to RFC 2045, messages always have a default + type this will always return a value. + + RFC 2045 defines a message's default type to be text/plain unless it + appears inside a multipart/digest container, in which case it would be + message/rfc822. + """ + missing = object() + value = self.get('content-type', missing) + if value is missing: + # This should have no parameters + return self.get_default_type() + ctype = _splitparam(value)[0].lower() + # RFC 2045, section 5.2 says if its invalid, use text/plain + if ctype.count('/') != 1: + return 'text/plain' + return ctype + + def get_content_maintype(self): + """Return the message's main content type. + + This is the `maintype' part of the string returned by + get_content_type(). + """ + ctype = self.get_content_type() + return ctype.split('/')[0] + + def get_content_subtype(self): + """Returns the message's sub-content type. + + This is the `subtype' part of the string returned by + get_content_type(). + """ + ctype = self.get_content_type() + return ctype.split('/')[1] + + def get_default_type(self): + """Return the `default' content type. + + Most messages have a default content type of text/plain, except for + messages that are subparts of multipart/digest containers. Such + subparts have a default content type of message/rfc822. + """ + return self._default_type + + def set_default_type(self, ctype): + """Set the `default' content type. + + ctype should be either "text/plain" or "message/rfc822", although this + is not enforced. The default content type is not stored in the + Content-Type header. + """ + self._default_type = ctype + + def _get_params_preserve(self, failobj, header): + # Like get_params() but preserves the quoting of values. BAW: + # should this be part of the public interface? + missing = object() + value = self.get(header, missing) + if value is missing: + return failobj + params = [] + for p in _parseparam(';' + value): + try: + name, val = p.split('=', 1) + name = name.strip() + val = val.strip() + except ValueError: + # Must have been a bare attribute + name = p.strip() + val = '' + params.append((name, val)) + params = utils.decode_params(params) + return params + + def get_params(self, failobj=None, header='content-type', unquote=True): + """Return the message's Content-Type parameters, as a list. + + The elements of the returned list are 2-tuples of key/value pairs, as + split on the `=' sign. The left hand side of the `=' is the key, + while the right hand side is the value. If there is no `=' sign in + the parameter the value is the empty string. The value is as + described in the get_param() method. + + Optional failobj is the object to return if there is no Content-Type + header. Optional header is the header to search instead of + Content-Type. If unquote is True, the value is unquoted. + """ + missing = object() + params = self._get_params_preserve(missing, header) + if params is missing: + return failobj + if unquote: + return [(k, _unquotevalue(v)) for k, v in params] + else: + return params + + def get_param(self, param, failobj=None, header='content-type', + unquote=True): + """Return the parameter value if found in the Content-Type header. + + Optional failobj is the object to return if there is no Content-Type + header, or the Content-Type header has no such parameter. Optional + header is the header to search instead of Content-Type. + + Parameter keys are always compared case insensitively. The return + value can either be a string, or a 3-tuple if the parameter was RFC + 2231 encoded. When it's a 3-tuple, the elements of the value are of + the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and + LANGUAGE can be None, in which case you should consider VALUE to be + encoded in the us-ascii charset. You can usually ignore LANGUAGE. + + Your application should be prepared to deal with 3-tuple return + values, and can convert the parameter to a Unicode string like so: + + param = msg.get_param('foo') + if isinstance(param, tuple): + param = unicode(param[2], param[0] or 'us-ascii') + + In any case, the parameter value (either the returned string, or the + VALUE item in the 3-tuple) is always unquoted, unless unquote is set + to False. + """ + if header not in self: + return failobj + for k, v in self._get_params_preserve(failobj, header): + if k.lower() == param.lower(): + if unquote: + return _unquotevalue(v) + else: + return v + return failobj + + def set_param(self, param, value, header='Content-Type', requote=True, + charset=None, language=''): + """Set a parameter in the Content-Type header. + + If the parameter already exists in the header, its value will be + replaced with the new value. + + If header is Content-Type and has not yet been defined for this + message, it will be set to "text/plain" and the new parameter and + value will be appended as per RFC 2045. + + An alternate header can be specified in the header argument, and all + parameters will be quoted as necessary unless requote is False. + + If charset is specified, the parameter will be encoded according to RFC + 2231. Optional language specifies the RFC 2231 language, defaulting + to the empty string. Both charset and language should be strings. + """ + if not isinstance(value, tuple) and charset: + value = (charset, language, value) + + if header not in self and header.lower() == 'content-type': + ctype = 'text/plain' + else: + ctype = self.get(header) + if not self.get_param(param, header=header): + if not ctype: + ctype = _formatparam(param, value, requote) + else: + ctype = SEMISPACE.join( + [ctype, _formatparam(param, value, requote)]) + else: + ctype = '' + for old_param, old_value in self.get_params(header=header, + unquote=requote): + append_param = '' + if old_param.lower() == param.lower(): + append_param = _formatparam(param, value, requote) + else: + append_param = _formatparam(old_param, old_value, requote) + if not ctype: + ctype = append_param + else: + ctype = SEMISPACE.join([ctype, append_param]) + if ctype != self.get(header): + del self[header] + self[header] = ctype + + def del_param(self, param, header='content-type', requote=True): + """Remove the given parameter completely from the Content-Type header. + + The header will be re-written in place without the parameter or its + value. All values will be quoted as necessary unless requote is + False. Optional header specifies an alternative to the Content-Type + header. + """ + if header not in self: + return + new_ctype = '' + for p, v in self.get_params(header=header, unquote=requote): + if p.lower() != param.lower(): + if not new_ctype: + new_ctype = _formatparam(p, v, requote) + else: + new_ctype = SEMISPACE.join([new_ctype, + _formatparam(p, v, requote)]) + if new_ctype != self.get(header): + del self[header] + self[header] = new_ctype + + def set_type(self, type, header='Content-Type', requote=True): + """Set the main type and subtype for the Content-Type header. + + type must be a string in the form "maintype/subtype", otherwise a + ValueError is raised. + + This method replaces the Content-Type header, keeping all the + parameters in place. If requote is False, this leaves the existing + header's quoting as is. Otherwise, the parameters will be quoted (the + default). + + An alternative header can be specified in the header argument. When + the Content-Type header is set, we'll always also add a MIME-Version + header. + """ + # BAW: should we be strict? + if not type.count('/') == 1: + raise ValueError + # Set the Content-Type, you get a MIME-Version + if header.lower() == 'content-type': + del self['mime-version'] + self['MIME-Version'] = '1.0' + if header not in self: + self[header] = type + return + params = self.get_params(header=header, unquote=requote) + del self[header] + self[header] = type + # Skip the first param; it's the old type. + for p, v in params[1:]: + self.set_param(p, v, header, requote) + + def get_filename(self, failobj=None): + """Return the filename associated with the payload if present. + + The filename is extracted from the Content-Disposition header's + `filename' parameter, and it is unquoted. If that header is missing + the `filename' parameter, this method falls back to looking for the + `name' parameter. + """ + missing = object() + filename = self.get_param('filename', missing, 'content-disposition') + if filename is missing: + filename = self.get_param('name', missing, 'content-type') + if filename is missing: + return failobj + return utils.collapse_rfc2231_value(filename).strip() + + def get_boundary(self, failobj=None): + """Return the boundary associated with the payload if present. + + The boundary is extracted from the Content-Type header's `boundary' + parameter, and it is unquoted. + """ + missing = object() + boundary = self.get_param('boundary', missing) + if boundary is missing: + return failobj + # RFC 2046 says that boundaries may begin but not end in w/s + return utils.collapse_rfc2231_value(boundary).rstrip() + + def set_boundary(self, boundary): + """Set the boundary parameter in Content-Type to 'boundary'. + + This is subtly different than deleting the Content-Type header and + adding a new one with a new boundary parameter via add_header(). The + main difference is that using the set_boundary() method preserves the + order of the Content-Type header in the original message. + + HeaderParseError is raised if the message has no Content-Type header. + """ + missing = object() + params = self._get_params_preserve(missing, 'content-type') + if params is missing: + # There was no Content-Type header, and we don't know what type + # to set it to, so raise an exception. + raise errors.HeaderParseError('No Content-Type header found') + newparams = [] + foundp = False + for pk, pv in params: + if pk.lower() == 'boundary': + newparams.append(('boundary', '"%s"' % boundary)) + foundp = True + else: + newparams.append((pk, pv)) + if not foundp: + # The original Content-Type header had no boundary attribute. + # Tack one on the end. BAW: should we raise an exception + # instead??? + newparams.append(('boundary', '"%s"' % boundary)) + # Replace the existing Content-Type header with the new value + newheaders = [] + for h, v in self._headers: + if h.lower() == 'content-type': + parts = [] + for k, v in newparams: + if v == '': + parts.append(k) + else: + parts.append('%s=%s' % (k, v)) + newheaders.append((h, SEMISPACE.join(parts))) + + else: + newheaders.append((h, v)) + self._headers = newheaders + + def get_content_charset(self, failobj=None): + """Return the charset parameter of the Content-Type header. + + The returned string is always coerced to lower case. If there is no + Content-Type header, or if that header has no charset parameter, + failobj is returned. + """ + missing = object() + charset = self.get_param('charset', missing) + if charset is missing: + return failobj + if isinstance(charset, tuple): + # RFC 2231 encoded, so decode it, and it better end up as ascii. + pcharset = charset[0] or 'us-ascii' + try: + # LookupError will be raised if the charset isn't known to + # Python. UnicodeError will be raised if the encoded text + # contains a character not in the charset. + charset = unicode(charset[2], pcharset).encode('us-ascii') + except (LookupError, UnicodeError): + charset = charset[2] + # charset character must be in us-ascii range + try: + if isinstance(charset, str): + charset = unicode(charset, 'us-ascii') + charset = charset.encode('us-ascii') + except UnicodeError: + return failobj + # RFC 2046, $4.1.2 says charsets are not case sensitive + return charset.lower() + + def get_charsets(self, failobj=None): + """Return a list containing the charset(s) used in this message. + + The returned list of items describes the Content-Type headers' + charset parameter for this message and all the subparts in its + payload. + + Each item will either be a string (the value of the charset parameter + in the Content-Type header of that part) or the value of the + 'failobj' parameter (defaults to None), if the part does not have a + main MIME type of "text", or the charset is not defined. + + The list will contain one string for each part of the message, plus + one for the container message (i.e. self), so that a non-multipart + message will still return a list of length 1. + """ + return [part.get_content_charset(failobj) for part in self.walk()] + + # I.e. def walk(self): ... + from email.iterators import walk diff --git a/third_party/stdlib/email/mime/__init__.py b/third_party/stdlib/email/mime/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/third_party/stdlib/email/mime/application.py b/third_party/stdlib/email/mime/application.py new file mode 100644 index 00000000..f5c59055 --- /dev/null +++ b/third_party/stdlib/email/mime/application.py @@ -0,0 +1,36 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Keith Dart +# Contact: email-sig@python.org + +"""Class representing application/* type MIME documents.""" + +__all__ = ["MIMEApplication"] + +from email import encoders +from email.mime.nonmultipart import MIMENonMultipart + + +class MIMEApplication(MIMENonMultipart): + """Class for generating application/* MIME documents.""" + + def __init__(self, _data, _subtype='octet-stream', + _encoder=encoders.encode_base64, **_params): + """Create an application/* type MIME document. + + _data is a string containing the raw application data. + + _subtype is the MIME content type subtype, defaulting to + 'octet-stream'. + + _encoder is a function which will perform the actual encoding for + transport of the application data, defaulting to base64 encoding. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + raise TypeError('Invalid application MIME subtype') + MIMENonMultipart.__init__(self, 'application', _subtype, **_params) + self.set_payload(_data) + _encoder(self) diff --git a/third_party/stdlib/email/mime/audio.py b/third_party/stdlib/email/mime/audio.py new file mode 100644 index 00000000..c7290c4b --- /dev/null +++ b/third_party/stdlib/email/mime/audio.py @@ -0,0 +1,73 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Anthony Baxter +# Contact: email-sig@python.org + +"""Class representing audio/* type MIME documents.""" + +__all__ = ['MIMEAudio'] + +import sndhdr + +from cStringIO import StringIO +from email import encoders +from email.mime.nonmultipart import MIMENonMultipart + + + +_sndhdr_MIMEmap = {'au' : 'basic', + 'wav' :'x-wav', + 'aiff':'x-aiff', + 'aifc':'x-aiff', + } + +# There are others in sndhdr that don't have MIME types. :( +# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? +def _whatsnd(data): + """Try to identify a sound file type. + + sndhdr.what() has a pretty cruddy interface, unfortunately. This is why + we re-do it here. It would be easier to reverse engineer the Unix 'file' + command and use the standard 'magic' file, as shipped with a modern Unix. + """ + hdr = data[:512] + fakefile = StringIO(hdr) + for testfn in sndhdr.tests: + res = testfn(hdr, fakefile) + if res is not None: + return _sndhdr_MIMEmap.get(res[0]) + return None + + + +class MIMEAudio(MIMENonMultipart): + """Class for generating audio/* MIME documents.""" + + def __init__(self, _audiodata, _subtype=None, + _encoder=encoders.encode_base64, **_params): + """Create an audio/* type MIME document. + + _audiodata is a string containing the raw audio data. If this data + can be decoded by the standard Python `sndhdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific audio subtype via the + _subtype parameter. If _subtype is not given, and no subtype can be + guessed, a TypeError is raised. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = _whatsnd(_audiodata) + if _subtype is None: + raise TypeError('Could not find audio MIME subtype') + MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) + self.set_payload(_audiodata) + _encoder(self) diff --git a/third_party/stdlib/email/mime/base.py b/third_party/stdlib/email/mime/base.py new file mode 100644 index 00000000..ac919258 --- /dev/null +++ b/third_party/stdlib/email/mime/base.py @@ -0,0 +1,26 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME specializations.""" + +__all__ = ['MIMEBase'] + +from email import message + + + +class MIMEBase(message.Message): + """Base class for MIME specializations.""" + + def __init__(self, _maintype, _subtype, **_params): + """This constructor adds a Content-Type: and a MIME-Version: header. + + The Content-Type: header is taken from the _maintype and _subtype + arguments. Additional parameters for this header are taken from the + keyword arguments. + """ + message.Message.__init__(self) + ctype = '%s/%s' % (_maintype, _subtype) + self.add_header('Content-Type', ctype, **_params) + self['MIME-Version'] = '1.0' diff --git a/third_party/stdlib/email/mime/image.py b/third_party/stdlib/email/mime/image.py new file mode 100644 index 00000000..55638232 --- /dev/null +++ b/third_party/stdlib/email/mime/image.py @@ -0,0 +1,46 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing image/* type MIME documents.""" + +__all__ = ['MIMEImage'] + +import imghdr + +from email import encoders +from email.mime.nonmultipart import MIMENonMultipart + + + +class MIMEImage(MIMENonMultipart): + """Class for generating image/* type MIME documents.""" + + def __init__(self, _imagedata, _subtype=None, + _encoder=encoders.encode_base64, **_params): + """Create an image/* type MIME document. + + _imagedata is a string containing the raw image data. If this data + can be decoded by the standard Python `imghdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific image subtype via the _subtype + parameter. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = imghdr.what(None, _imagedata) + if _subtype is None: + raise TypeError('Could not guess image MIME subtype') + MIMENonMultipart.__init__(self, 'image', _subtype, **_params) + self.set_payload(_imagedata) + _encoder(self) diff --git a/third_party/stdlib/email/mime/message.py b/third_party/stdlib/email/mime/message.py new file mode 100644 index 00000000..275dbfd0 --- /dev/null +++ b/third_party/stdlib/email/mime/message.py @@ -0,0 +1,34 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing message/* MIME documents.""" + +__all__ = ['MIMEMessage'] + +from email import message +from email.mime.nonmultipart import MIMENonMultipart + + + +class MIMEMessage(MIMENonMultipart): + """Class representing message/* MIME documents.""" + + def __init__(self, _msg, _subtype='rfc822'): + """Create a message/* type MIME document. + + _msg is a message object and must be an instance of Message, or a + derived class of Message, otherwise a TypeError is raised. + + Optional _subtype defines the subtype of the contained message. The + default is "rfc822" (this is defined by the MIME standard, even though + the term "rfc822" is technically outdated by RFC 2822). + """ + MIMENonMultipart.__init__(self, 'message', _subtype) + if not isinstance(_msg, message.Message): + raise TypeError('Argument is not an instance of Message') + # It's convenient to use this base class method. We need to do it + # this way or we'll get an exception + message.Message.attach(self, _msg) + # And be sure our default type is set correctly + self.set_default_type('message/rfc822') diff --git a/third_party/stdlib/email/mime/multipart.py b/third_party/stdlib/email/mime/multipart.py new file mode 100644 index 00000000..96618650 --- /dev/null +++ b/third_party/stdlib/email/mime/multipart.py @@ -0,0 +1,47 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME multipart/* type messages.""" + +__all__ = ['MIMEMultipart'] + +from email.mime.base import MIMEBase + + + +class MIMEMultipart(MIMEBase): + """Base class for MIME multipart/* type messages.""" + + def __init__(self, _subtype='mixed', boundary=None, _subparts=None, + **_params): + """Creates a multipart/* type message. + + By default, creates a multipart/mixed message, with proper + Content-Type and MIME-Version headers. + + _subtype is the subtype of the multipart content type, defaulting to + `mixed'. + + boundary is the multipart boundary string. By default it is + calculated as needed. + + _subparts is a sequence of initial subparts for the payload. It + must be an iterable object, such as a list. You can always + attach new subparts to the message by using the attach() method. + + Additional parameters for the Content-Type header are taken from the + keyword arguments (or passed into the _params argument). + """ + MIMEBase.__init__(self, 'multipart', _subtype, **_params) + + # Initialise _payload to an empty list as the Message superclass's + # implementation of is_multipart assumes that _payload is a list for + # multipart messages. + self._payload = [] + + if _subparts: + for p in _subparts: + self.attach(p) + if boundary: + self.set_boundary(boundary) diff --git a/third_party/stdlib/email/mime/nonmultipart.py b/third_party/stdlib/email/mime/nonmultipart.py new file mode 100644 index 00000000..e1f51968 --- /dev/null +++ b/third_party/stdlib/email/mime/nonmultipart.py @@ -0,0 +1,22 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME type messages that are not multipart.""" + +__all__ = ['MIMENonMultipart'] + +from email import errors +from email.mime.base import MIMEBase + + + +class MIMENonMultipart(MIMEBase): + """Base class for MIME non-multipart type messages.""" + + def attach(self, payload): + # The public API prohibits attaching multiple subparts to MIMEBase + # derived subtypes since none of them are, by definition, of content + # type multipart/* + raise errors.MultipartConversionError( + 'Cannot attach additional subparts to non-multipart/*') diff --git a/third_party/stdlib/email/mime/text.py b/third_party/stdlib/email/mime/text.py new file mode 100644 index 00000000..5747db5d --- /dev/null +++ b/third_party/stdlib/email/mime/text.py @@ -0,0 +1,30 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing text/* type MIME documents.""" + +__all__ = ['MIMEText'] + +from email.encoders import encode_7or8bit +from email.mime.nonmultipart import MIMENonMultipart + + + +class MIMEText(MIMENonMultipart): + """Class for generating text/* type MIME documents.""" + + def __init__(self, _text, _subtype='plain', _charset='us-ascii'): + """Create a text/* type MIME document. + + _text is the string for this message object. + + _subtype is the MIME sub content type, defaulting to "plain". + + _charset is the character set parameter added to the Content-Type + header. This defaults to "us-ascii". Note that as a side-effect, the + Content-Transfer-Encoding header will also be set. + """ + MIMENonMultipart.__init__(self, 'text', _subtype, + **{'charset': _charset}) + self.set_payload(_text, _charset) diff --git a/third_party/stdlib/email/parser.py b/third_party/stdlib/email/parser.py new file mode 100644 index 00000000..6dad32a3 --- /dev/null +++ b/third_party/stdlib/email/parser.py @@ -0,0 +1,91 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter +# Contact: email-sig@python.org + +"""A parser of RFC 2822 and MIME email messages.""" + +__all__ = ['Parser', 'HeaderParser'] + +import warnings +from cStringIO import StringIO + +from email.feedparser import FeedParser +from email.message import Message + + + +class Parser: + def __init__(self, *args, **kws): + """Parser of RFC 2822 and MIME email messages. + + Creates an in-memory object tree representing the email message, which + can then be manipulated and turned over to a Generator to return the + textual representation of the message. + + The string must be formatted as a block of RFC 2822 headers and header + continuation lines, optionally preceded by a `Unix-from' header. The + header block is terminated either by the end of the string or by a + blank line. + + _class is the class to instantiate for new message objects when they + must be created. This class must have a constructor that can take + zero arguments. Default is Message.Message. + """ + if len(args) >= 1: + if '_class' in kws: + raise TypeError("Multiple values for keyword arg '_class'") + kws['_class'] = args[0] + if len(args) == 2: + if 'strict' in kws: + raise TypeError("Multiple values for keyword arg 'strict'") + kws['strict'] = args[1] + if len(args) > 2: + raise TypeError('Too many arguments') + if '_class' in kws: + self._class = kws['_class'] + del kws['_class'] + else: + self._class = Message + if 'strict' in kws: + warnings.warn("'strict' argument is deprecated (and ignored)", + DeprecationWarning, 2) + del kws['strict'] + if kws: + raise TypeError('Unexpected keyword arguments') + + def parse(self, fp, headersonly=False): + """Create a message structure from the data in a file. + + Reads all the data from the file and returns the root of the message + structure. Optional headersonly is a flag specifying whether to stop + parsing after reading the headers or not. The default is False, + meaning it parses the entire contents of the file. + """ + feedparser = FeedParser(self._class) + if headersonly: + feedparser._set_headersonly() + while True: + data = fp.read(8192) + if not data: + break + feedparser.feed(data) + return feedparser.close() + + def parsestr(self, text, headersonly=False): + """Create a message structure from a string. + + Returns the root of the message structure. Optional headersonly is a + flag specifying whether to stop parsing after reading the headers or + not. The default is False, meaning it parses the entire contents of + the file. + """ + return self.parse(StringIO(text), headersonly=headersonly) + + + +class HeaderParser(Parser): + def parse(self, fp, headersonly=True): + return Parser.parse(self, fp, True) + + def parsestr(self, text, headersonly=True): + return Parser.parsestr(self, text, True) diff --git a/third_party/stdlib/email/quoprimime.py b/third_party/stdlib/email/quoprimime.py new file mode 100644 index 00000000..0c18a9e0 --- /dev/null +++ b/third_party/stdlib/email/quoprimime.py @@ -0,0 +1,336 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org + +"""Quoted-printable content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to +safely encode text that is in a character set similar to the 7-bit US ASCII +character set, but that includes some 8-bit characters that are normally not +allowed in email bodies or headers. + +Quoted-printable is very space-inefficient for encoding binary files; use the +email.base64mime module for that instead. + +This module provides an interface to encode and decode both headers and bodies +with quoted-printable encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:/From:/Cc: etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character +conversion necessary for proper internationalized headers; it only +does dumb encoding and decoding. To deal with the various line +wrapping issues, use the email.header module. +""" + +__all__ = [ + 'body_decode', + 'body_encode', + 'body_quopri_check', + 'body_quopri_len', + 'decode', + 'decodestring', + 'encode', + 'encodestring', + 'header_decode', + 'header_encode', + 'header_quopri_check', + 'header_quopri_len', + 'quote', + 'unquote', + ] + +import re + +from string import hexdigits +from email.utils import fix_eols + +CRLF = '\r\n' +NL = '\n' + +# See also Charset.py +MISC_LEN = 7 + +hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]') +bqre = re.compile(r'[^ !-<>-~\t]') + + + +# Helpers +def header_quopri_check(c): + """Return True if the character should be escaped with header quopri.""" + return bool(hqre.match(c)) + + +def body_quopri_check(c): + """Return True if the character should be escaped with body quopri.""" + return bool(bqre.match(c)) + + +def header_quopri_len(s): + """Return the length of str when it is encoded with header quopri.""" + count = 0 + for c in s: + if hqre.match(c): + count += 3 + else: + count += 1 + return count + + +def body_quopri_len(str): + """Return the length of str when it is encoded with body quopri.""" + count = 0 + for c in str: + if bqre.match(c): + count += 3 + else: + count += 1 + return count + + +def _max_append(L, s, maxlen, extra=''): + if not L: + L.append(s.lstrip()) + elif len(L[-1]) + len(s) <= maxlen: + L[-1] += extra + s + else: + L.append(s.lstrip()) + + +def unquote(s): + """Turn a string in the form =AB to the ASCII character with value 0xab""" + return chr(int(s[1:3], 16)) + + +def quote(c): + return "=%02X" % ord(c) + + + +def header_encode(header, charset="iso-8859-1", keep_eols=False, + maxlinelen=76, eol=NL): + """Encode a single header line with quoted-printable (like) encoding. + + Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but + used specifically for email header fields to allow charsets with mostly 7 + bit characters (and some 8 bit) to remain more or less readable in non-RFC + 2045 aware mail clients. + + charset names the character set to use to encode the header. It defaults + to iso-8859-1. + + The resulting string will be in the form: + + "=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n + =?charset?q?Silly_=C8nglish_Kn=EEghts?=" + + with each line wrapped safely at, at most, maxlinelen characters (defaults + to 76 characters). If maxlinelen is None, the entire string is encoded in + one chunk with no splitting. + + End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted + to the canonical email line separator \\r\\n unless the keep_eols + parameter is True (the default is False). + + Each line of the header will be terminated in the value of eol, which + defaults to "\\n". Set this to "\\r\\n" if you are using the result of + this function directly in email. + """ + # Return empty headers unchanged + if not header: + return header + + if not keep_eols: + header = fix_eols(header) + + # Quopri encode each line, in encoded chunks no greater than maxlinelen in + # length, after the RFC chrome is added in. + quoted = [] + if maxlinelen is None: + # An obnoxiously large number that's good enough + max_encoded = 100000 + else: + max_encoded = maxlinelen - len(charset) - MISC_LEN - 1 + + for c in header: + # Space may be represented as _ instead of =20 for readability + if c == ' ': + _max_append(quoted, '_', max_encoded) + # These characters can be included verbatim + elif not hqre.match(c): + _max_append(quoted, c, max_encoded) + # Otherwise, replace with hex value like =E2 + else: + _max_append(quoted, "=%02X" % ord(c), max_encoded) + + # Now add the RFC chrome to each encoded chunk and glue the chunks + # together. BAW: should we be able to specify the leading whitespace in + # the joiner? + joiner = eol + ' ' + return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted]) + + + +def encode(body, binary=False, maxlinelen=76, eol=NL): + """Encode with quoted-printable, wrapping at maxlinelen characters. + + If binary is False (the default), end-of-line characters will be converted + to the canonical email end-of-line sequence \\r\\n. Otherwise they will + be left verbatim. + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\\r\\n" if you will be using the result of this function directly + in an email. + + Each line will be wrapped at, at most, maxlinelen characters (defaults to + 76 characters). Long lines will have the `soft linefeed' quoted-printable + character "=" appended to them, so the decoded text will be identical to + the original text. + """ + if not body: + return body + + if not binary: + body = fix_eols(body) + + # BAW: We're accumulating the body text by string concatenation. That + # can't be very efficient, but I don't have time now to rewrite it. It + # just feels like this algorithm could be more efficient. + encoded_body = '' + lineno = -1 + # Preserve line endings here so we can check later to see an eol needs to + # be added to the output later. + lines = body.splitlines(1) + for line in lines: + # But strip off line-endings for processing this line. + if line.endswith(CRLF): + line = line[:-2] + elif line[-1] in CRLF: + line = line[:-1] + + lineno += 1 + encoded_line = '' + prev = None + linelen = len(line) + # Now we need to examine every character to see if it needs to be + # quopri encoded. BAW: again, string concatenation is inefficient. + for j in range(linelen): + c = line[j] + prev = c + if bqre.match(c): + c = quote(c) + elif j+1 == linelen: + # Check for whitespace at end of line; special case + if c not in ' \t': + encoded_line += c + prev = c + continue + # Check to see to see if the line has reached its maximum length + if len(encoded_line) + len(c) >= maxlinelen: + encoded_body += encoded_line + '=' + eol + encoded_line = '' + encoded_line += c + # Now at end of line.. + if prev and prev in ' \t': + # Special case for whitespace at end of file + if lineno + 1 == len(lines): + prev = quote(prev) + if len(encoded_line) + len(prev) > maxlinelen: + encoded_body += encoded_line + '=' + eol + prev + else: + encoded_body += encoded_line + prev + # Just normal whitespace at end of line + else: + encoded_body += encoded_line + prev + '=' + eol + encoded_line = '' + # Now look at the line we just finished and it has a line ending, we + # need to add eol to the end of the line. + if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF: + encoded_body += encoded_line + eol + else: + encoded_body += encoded_line + encoded_line = '' + return encoded_body + + +# For convenience and backwards compatibility w/ standard base64 module +body_encode = encode +encodestring = encode + + + +# BAW: I'm not sure if the intent was for the signature of this function to be +# the same as base64MIME.decode() or not... +def decode(encoded, eol=NL): + """Decode a quoted-printable string. + + Lines are separated with eol, which defaults to \\n. + """ + if not encoded: + return encoded + # BAW: see comment in encode() above. Again, we're building up the + # decoded string with string concatenation, which could be done much more + # efficiently. + decoded = '' + + for line in encoded.splitlines(): + line = line.rstrip() + if not line: + decoded += eol + continue + + i = 0 + n = len(line) + while i < n: + c = line[i] + if c != '=': + decoded += c + i += 1 + # Otherwise, c == "=". Are we at the end of the line? If so, add + # a soft line break. + elif i+1 == n: + i += 1 + continue + # Decode if in form =AB + elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: + decoded += unquote(line[i:i+3]) + i += 3 + # Otherwise, not in form =AB, pass literally + else: + decoded += c + i += 1 + + if i == n: + decoded += eol + # Special case if original string did not end with eol + if not encoded.endswith(eol) and decoded.endswith(eol): + decoded = decoded[:-1] + return decoded + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode + + + +def _unquote_match(match): + """Turn a match in the form =AB to the ASCII character with value 0xab""" + s = match.group(0) + return unquote(s) + + +# Header decoding is done a bit differently +def header_decode(s): + """Decode a string encoded with RFC 2045 MIME header `Q' encoding. + + This function does not parse a full MIME header value encoded with + quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use + the high level email.header class for that functionality. + """ + s = s.replace('_', ' ') + return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s) diff --git a/third_party/stdlib/email/test/__init__.py b/third_party/stdlib/email/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/third_party/stdlib/email/test/data/PyBanner048.gif b/third_party/stdlib/email/test/data/PyBanner048.gif new file mode 100644 index 00000000..1a5c87f6 Binary files /dev/null and b/third_party/stdlib/email/test/data/PyBanner048.gif differ diff --git a/third_party/stdlib/email/test/data/audiotest.au b/third_party/stdlib/email/test/data/audiotest.au new file mode 100644 index 00000000..f76b0501 Binary files /dev/null and b/third_party/stdlib/email/test/data/audiotest.au differ diff --git a/third_party/stdlib/email/test/data/msg_01.txt b/third_party/stdlib/email/test/data/msg_01.txt new file mode 100644 index 00000000..7e33bcf9 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_01.txt @@ -0,0 +1,19 @@ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 + + +Hi, + +Do you like this message? + +-Me diff --git a/third_party/stdlib/email/test/data/msg_02.txt b/third_party/stdlib/email/test/data/msg_02.txt new file mode 100644 index 00000000..5d0a7e16 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_02.txt @@ -0,0 +1,136 @@ +MIME-version: 1.0 +From: ppp-request@zzz.org +Sender: ppp-admin@zzz.org +To: ppp@zzz.org +Subject: Ppp digest, Vol 1 #2 - 5 msgs +Date: Fri, 20 Apr 2001 20:18:00 -0400 (EDT) +X-Mailer: Mailman v2.0.4 +X-Mailman-Version: 2.0.4 +Content-Type: multipart/mixed; boundary="192.168.1.2.889.32614.987812255.500.21814" + +--192.168.1.2.889.32614.987812255.500.21814 +Content-type: text/plain; charset=us-ascii +Content-description: Masthead (Ppp digest, Vol 1 #2) + +Send Ppp mailing list submissions to + ppp@zzz.org + +To subscribe or unsubscribe via the World Wide Web, visit + http://www.zzz.org/mailman/listinfo/ppp +or, via email, send a message with subject or body 'help' to + ppp-request@zzz.org + +You can reach the person managing the list at + ppp-admin@zzz.org + +When replying, please edit your Subject line so it is more specific +than "Re: Contents of Ppp digest..." + + +--192.168.1.2.889.32614.987812255.500.21814 +Content-type: text/plain; charset=us-ascii +Content-description: Today's Topics (5 msgs) + +Today's Topics: + + 1. testing #1 (Barry A. Warsaw) + 2. testing #2 (Barry A. Warsaw) + 3. testing #3 (Barry A. Warsaw) + 4. testing #4 (Barry A. Warsaw) + 5. testing #5 (Barry A. Warsaw) + +--192.168.1.2.889.32614.987812255.500.21814 +Content-Type: multipart/digest; boundary="__--__--" + +--__--__-- + +Message: 1 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +Date: Fri, 20 Apr 2001 20:16:13 -0400 +To: ppp@zzz.org +From: barry@digicool.com (Barry A. Warsaw) +Subject: [Ppp] testing #1 +Precedence: bulk + + +hello + + +--__--__-- + +Message: 2 +Date: Fri, 20 Apr 2001 20:16:21 -0400 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +To: ppp@zzz.org +From: barry@digicool.com (Barry A. Warsaw) +Precedence: bulk + + +hello + + +--__--__-- + +Message: 3 +Date: Fri, 20 Apr 2001 20:16:25 -0400 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +To: ppp@zzz.org +From: barry@digicool.com (Barry A. Warsaw) +Subject: [Ppp] testing #3 +Precedence: bulk + + +hello + + +--__--__-- + +Message: 4 +Date: Fri, 20 Apr 2001 20:16:28 -0400 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +To: ppp@zzz.org +From: barry@digicool.com (Barry A. Warsaw) +Subject: [Ppp] testing #4 +Precedence: bulk + + +hello + + +--__--__-- + +Message: 5 +Date: Fri, 20 Apr 2001 20:16:32 -0400 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +To: ppp@zzz.org +From: barry@digicool.com (Barry A. Warsaw) +Subject: [Ppp] testing #5 +Precedence: bulk + + +hello + + + + +--__--__---- + +--192.168.1.2.889.32614.987812255.500.21814 +Content-type: text/plain; charset=us-ascii +Content-description: Digest Footer + +_______________________________________________ +Ppp mailing list +Ppp@zzz.org +http://www.zzz.org/mailman/listinfo/ppp + + +--192.168.1.2.889.32614.987812255.500.21814-- + +End of Ppp Digest + diff --git a/third_party/stdlib/email/test/data/msg_03.txt b/third_party/stdlib/email/test/data/msg_03.txt new file mode 100644 index 00000000..c748ebf1 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_03.txt @@ -0,0 +1,16 @@ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 + + +Hi, + +Do you like this message? + +-Me diff --git a/third_party/stdlib/email/test/data/msg_04.txt b/third_party/stdlib/email/test/data/msg_04.txt new file mode 100644 index 00000000..1f633c44 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_04.txt @@ -0,0 +1,37 @@ +Return-Path: +Delivered-To: barry@python.org +Received: by mail.python.org (Postfix, from userid 889) + id C2BF0D37C6; Tue, 11 Sep 2001 00:05:05 -0400 (EDT) +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="h90VIIIKmx" +Content-Transfer-Encoding: 7bit +Message-ID: <15261.36209.358846.118674@anthem.python.org> +From: barry@python.org (Barry A. Warsaw) +To: barry@python.org +Subject: a simple multipart +Date: Tue, 11 Sep 2001 00:05:05 -0400 +X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid +X-Attribution: BAW +X-Oblique-Strategy: Make a door into a window + + +--h90VIIIKmx +Content-Type: text/plain +Content-Disposition: inline; + filename="msg.txt" +Content-Transfer-Encoding: 7bit + +a simple kind of mirror +to reflect upon our own + +--h90VIIIKmx +Content-Type: text/plain +Content-Disposition: inline; + filename="msg.txt" +Content-Transfer-Encoding: 7bit + +a simple kind of mirror +to reflect upon our own + +--h90VIIIKmx-- + diff --git a/third_party/stdlib/email/test/data/msg_05.txt b/third_party/stdlib/email/test/data/msg_05.txt new file mode 100644 index 00000000..87d5e9cb --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_05.txt @@ -0,0 +1,28 @@ +From: foo +Subject: bar +To: baz +MIME-Version: 1.0 +Content-Type: multipart/report; report-type=delivery-status; + boundary="D1690A7AC1.996856090/mail.example.com" +Message-Id: <20010803162810.0CA8AA7ACC@mail.example.com> + +This is a MIME-encapsulated message. + +--D1690A7AC1.996856090/mail.example.com +Content-Type: text/plain + +Yadda yadda yadda + +--D1690A7AC1.996856090/mail.example.com + +Yadda yadda yadda + +--D1690A7AC1.996856090/mail.example.com +Content-Type: message/rfc822 + +From: nobody@python.org + +Yadda yadda yadda + +--D1690A7AC1.996856090/mail.example.com-- + diff --git a/third_party/stdlib/email/test/data/msg_06.txt b/third_party/stdlib/email/test/data/msg_06.txt new file mode 100644 index 00000000..69f3a47f --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_06.txt @@ -0,0 +1,33 @@ +Return-Path: +Delivered-To: barry@python.org +MIME-Version: 1.0 +Content-Type: message/rfc822 +Content-Description: forwarded message +Content-Transfer-Encoding: 7bit +Message-ID: <15265.9482.641338.555352@python.org> +From: barry@zope.com (Barry A. Warsaw) +Sender: barry@python.org +To: barry@python.org +Subject: forwarded message from Barry A. Warsaw +Date: Thu, 13 Sep 2001 17:28:42 -0400 +X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid +X-Attribution: BAW +X-Oblique-Strategy: Be dirty +X-Url: http://barry.wooz.org + +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Return-Path: +Delivered-To: barry@python.org +Message-ID: <15265.9468.713530.98441@python.org> +From: barry@zope.com (Barry A. Warsaw) +Sender: barry@python.org +To: barry@python.org +Subject: testing +Date: Thu, 13 Sep 2001 17:28:28 -0400 +X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid +X-Attribution: BAW +X-Oblique-Strategy: Spectrum analysis +X-Url: http://barry.wooz.org + + diff --git a/third_party/stdlib/email/test/data/msg_07.txt b/third_party/stdlib/email/test/data/msg_07.txt new file mode 100644 index 00000000..721f3a0d --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_07.txt @@ -0,0 +1,83 @@ +MIME-Version: 1.0 +From: Barry +To: Dingus Lovers +Subject: Here is your dingus fish +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + +Hi there, + +This is the dingus fish. + +--BOUNDARY +Content-Type: image/gif; name="dingusfish.gif" +Content-Transfer-Encoding: base64 +content-disposition: attachment; filename="dingusfish.gif" + +R0lGODdhAAEAAfAAAP///wAAACwAAAAAAAEAAQAC/oSPqcvtD6OctNqLs968+w+G4kiW5omm6sq2 +7gvH8kzX9o3n+s73/g8MCofEovGITGICTKbyCV0FDNOo9SqpQqpOrJfXzTQj2vD3TGtqL+NtGQ2f +qTXmxzuOd7WXdcc9DyjU53ewFni4s0fGhdiYaEhGBelICTNoV1j5NUnFcrmUqemjNifJVWpaOqaI +oFq3SspZsSraE7sHq3jr1MZqWvi662vxV4tD+pvKW6aLDOCLyur8PDwbanyDeq0N3DctbQYeLDvR +RY6t95m6UB0d3mwIrV7e2VGNvjjffukeJp4w7F65KecGFsTHQGAygOrgrWs1jt28Rc88KESYcGLA +/obvTkH6p+CinWJiJmIMqXGQwH/y4qk0SYjgQTczT3ajKZGfuI0uJ4kkVI/DT5s3/ejkxI0aT4Y+ +YTYgWbImUaXk9nlLmnSh1qJiJFl0OpUqRK4oOy7NyRQtHWofhoYVxkwWXKUSn0YsS+fUV6lhqfYb +6ayd3Z5qQdG1B7bvQzaJjwUV2lixMUZ7JVsOlfjWVr/3NB/uFvnySBN6Dcb6rGwaRM3wsormw5cC +M9NxWy/bWdufudCvy8bOAjXjVVwta/uO21sE5RHBCzNFXtgq9ORtH4eYjVP4Yryo026nvkFmCeyA +B29efV6ravCMK5JwWd5897Qrx7ll38o6iHDZ/rXPR//feevhF4l7wjUGX3xq1eeRfM4RSJGBIV1D +z1gKPkfWag3mVBVvva1RlX5bAJTPR/2YqNtw/FkIYYEi/pIZiAdpcxpoHtmnYYoZtvhUftzdx5ZX +JSKDW405zkGcZzzGZ6KEv4FI224oDmijlEf+xp6MJK5ojY/ASeVUR+wsKRuJ+XFZ5o7ZeEime8t1 +ouUsU6YjF5ZtUihhkGfCdFQLWQFJ3UXxmElfhQnR+eCdcDbkFZp6vTRmj56ApCihn5QGpaToNZmR +n3NVSpZcQpZ2KEONusaiCsKAug0wkQbJSFO+PTSjneGxOuFjPlUk3ovWvdIerjUg9ZGIOtGq/qeX +eCYrrCX+1UPsgTKGGRSbzd5q156d/gpfbJxe66eD5iQKrXj7RGgruGxs62qebBHUKS32CKluCiqZ +qh+pmehmEb71noAUoe5e9Zm17S7773V10pjrtG4CmuurCV/n6zLK5turWNhqOvFXbjhZrMD0YhKe +wR0zOyuvsh6MWrGoIuzvyWu5y1WIFAqmJselypxXh6dKLNOKEB98L88bS2rkNqqlKzCNJp9c0G0j +Gzh0iRrCbHSXmPR643QS+4rWhgFmnSbSuXCjS0xAOWkU2UdLqyuUNfHSFdUouy3bm5i5GnDM3tG8 +doJ4r5tqu3pPbRSVfvs8uJzeNXhp3n4j/tZ42SwH7eaWUUOjc3qFV9453UHTXZfcLH+OeNs5g36x +lBnHvTm7EbMbLeuaLncao8vWCXimfo1o+843Ak6y4ChNeGntvAYvfLK4ezmoyNIbNCLTCXO9ZV3A +E8/s88RczPzDwI4Ob7XZyl7+9Miban29h+tJZPrE21wgvBphDfrrfPdCTPKJD/y98L1rZwHcV6Jq +Zab0metpuNIX/qAFPoz171WUaUb4HAhBSzHuHfjzHb3kha/2Cctis/ORArVHNYfFyYRH2pYIRzic +isVOfPWD1b6mRTqpCRBozzof6UZVvFXRxWIr3GGrEviGYgyPMfahheiSaLs/9QeFu7oZ/ndSY8DD +ya9x+uPed+7mxN2IzIISBOMLFYWVqC3Pew1T2nFuuCiwZS5/v6II10i4t1OJcUH2U9zxKodHsGGv +Oa+zkvNUYUOa/TCCRutF9MzDwdlUMJADTCGSbDQ5OV4PTamDoPEi6Ecc/RF5RWwkcdSXvSOaDWSn +I9LlvubFTQpuc6JKXLcKeb+xdbKRBnwREemXyjg6ME65aJiOuBgrktzykfPLJBKR9ClMavJ62/Ff +BlNIyod9yX9wcSXexnXFpvkrbXk64xsx5Db7wXKP5fSgsvwIMM/9631VLBfkmtbHRXpqmtei52hG +pUwSlo+BASQoeILDOBgREECxBBh5/iYmNsQ9dIv5+OI++QkqdsJPc3uykz5fkM+OraeekcQF7X4n +B5S67za5U967PmooGQhUXfF7afXyCD7ONdRe17QogYjVx38uLwtrS6nhTnm15LQUnu9E2uK6CNI/ +1HOABj0ESwOjut4FEpFQpdNAm4K2LHnDWHNcmKB2ioKBogysVZtMO2nSxUdZ8Yk2kJc7URioLVI0 +YgmtIwZj4LoeKemgnOnbUdGnzZ4Oa6scqiolBGqS6RgWNLu0RMhcaE6rhhU4hiuqFXPAG8fGwTPW +FKeLMtdVmXLSs5YJGF/YeVm7rREMlY3UYE+yCxbaMXX8y15m5zVHq6GOKDMynzII/jdUHdyVqIy0 +ifX2+r/EgtZcvRzSb72gU9ui87M2VecjKildW/aFqaYhKoryUjfB/g4qtyVuc60xFDGmCxwjW+qu +zjuwl2GkOWn66+3QiiEctvd04OVvcCVzjgT7lrkvjVGKKHmmlDUKowSeikb5kK/mJReuWOxONx+s +ULsl+Lqb0CVn0SrVyJ6wt4t6yTeSCafhPhAf0OXn6L60UMxiLolFAtmN35S2Ob1lZpQ1r/n0Qb5D +oQ1zJiRVDgF8N3Q8TYfbi3DyWCy3lT1nxyBs6FT3S2GOzWRlxwKvlRP0RPJA9SjxEy0UoEnkA+M4 +cnzLMJrBGWLFEaaUb5lvpqbq/loOaU5+DFuHPxo82/OZuM8FXG3oVNZhtWpMpb/0Xu5m/LfLhHZQ +7yuVI0MqZ7NE43imC8jH3IwGZlbPm0xkJYs7+2U48hXTsFSMqgGDvai0kLxyynKNT/waj+q1c1tz +GjOpPBgdCSq3UKZxCSsqFIY+O6JbAWGWcV1pwqLyj5sGqCF1xb1F3varUWqrJv6cN3PrUXzijtfZ +FshpBL3Xwr4GIPvU2N8EjrJgS1zl21rbXQMXeXc5jjFyrhpCzijSv/RQtyPSzHCFMhlME95fHglt +pRsX+dfSQjUeHAlpWzJ5iOo79Ldnaxai6bXTcGO3fp07ri7HLEmXXPlYi8bv/qVxvNcdra6m7Rlb +6JBTb5fd66VhFRjGArh2n7R1rDW4P5NOT9K0I183T2scYkeZ3q/VFyLb09U9ajzXBS8Kgkhc4mBS +kYY9cy3Vy9lUnuNJH8HGIclUilwnBtjUOH0gteGOZ4c/XNrhXLSYDyxfnD8z1pDy7rYRvDolhnbe +UMzxCZUs40s6s7UIvBnLgc0+vKuOkIXeOrDymlp+Zxra4MZLBbVrqD/jTJ597pDmnw5c4+DbyB88 +9Cg9DodYcSuMZT/114pptqc/EuTjRPvH/z5slzI3tluOEBBLqOXLOX+0I5929tO97wkvl/atCz+y +xJrdwteW2FNW/NSmBP+f/maYtVs/bYyBC7Ox3jsYZHL05CIrBa/nS+b3bHfiYm4Ueil1YZZSgAUI +fFZ1dxUmeA2oQRQ3RuGXNGLFV9/XbGFGPV6kfzk1TBBCd+izc7q1H+OHMJwmaBX2IQNYVAKHYepV +SSGCe6CnbYHHETKGNe43EDvFgZr0gB/nVHPHZ80VV1ojOiI3XDvYIkl4ayo4bxQIgrFXWTvBI0nH +VElWMuw2aLUWCRHHf8ymVCHjFlJnOSojfevCYyyyZDH0IcvHhrsnQ5O1OsWzONuVVKIxSxiFZ/tR +fKDAf6xFTnw4O9Qig2VCfW2hJQrmMOuHW0W3dLQmCMO2ccdUd/xyfflH/olTiHZVdGwb8nIwRzSE +J15jFlOJuBZBZ4CiyHyd2IFylFlB+HgHhYabhWOGwYO1ZH/Og1dtQlFMk352CGRSIFTapnWQEUtN +l4zv8S0aaCFDyGCBqDUxZYpxGHX01y/JuH1xhn7TOCnNCI4eKDs5WGX4R425F4vF1o3BJ4vO0otq +I3rimI7jJY1jISqnBxknCIvruF83mF5wN4X7qGLIhR8A2Vg0yFERSIXn9Vv3GHy3Vj/WIkKddlYi +yIMv2I/VMjTLpW7pt05SWIZR0RPyxpB4SIUM9lBPGBl0GC7oSEEwRYLe4pJpZY2P0zbI1n+Oc44w +qY3PUnmF0ixjVpDD/mJ9wpOBGTVgXlaCaZiPcIWK5NiKBIiPdGaQ0TWGvAiG7nMchdZb7Vgf8zNi +MuMyzRdy/lePe9iC4TRx7WhhOQI/QiSVNAmAa2lT/piFbuh7ofJoYSZzrSZ1bvmWw3eN2nKUPVky +uPN5/VRfohRd0VYZoqhKIlU6TXYhJxmPUIloAwc1bPmHEpaZYZORHNlXUJM07hATwHR8MJYqkwWR +WaIezFhxSFlc8/Fq82hEnpeRozg3ULhhr9lAGtVEkCg5ZNRuuVleBPaZadhG0ZgkyPmDOTOKzViM +YgOcpukKqQcbjAWS0IleQ2ROjdh6A+md1qWdBRSX7iSYgFRTtRmBpJioieXJiHfJiMGIR9fJOn8I +MSfXYhspn4ooSa2mSAj4n+8Bmg03fBJZoPOJgsVZRxu1oOMRPXYYjdqjihFaEoZpXBREanuJoRI6 +cibFinq4ngUKh/wQd/H5ofYCZ0HJXR62opZFaAT0iFIZo4DIiUojkjeqKiuoZirKo5Y1a7AWckGa +BkuYoD5lpDK6eUs6CkDqpETwl1EqpfhJpVeKpVl6EgUAADs= + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_08.txt b/third_party/stdlib/email/test/data/msg_08.txt new file mode 100644 index 00000000..b5630836 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_08.txt @@ -0,0 +1,24 @@ +MIME-Version: 1.0 +From: Barry Warsaw +To: Dingus Lovers +Subject: Lyrics +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + + +--BOUNDARY +Content-Type: text/html; charset="iso-8859-1" + + +--BOUNDARY +Content-Type: text/plain; charset="iso-8859-2" + + +--BOUNDARY +Content-Type: text/plain; charset="koi8-r" + + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_09.txt b/third_party/stdlib/email/test/data/msg_09.txt new file mode 100644 index 00000000..575c4c20 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_09.txt @@ -0,0 +1,24 @@ +MIME-Version: 1.0 +From: Barry Warsaw +To: Dingus Lovers +Subject: Lyrics +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + + +--BOUNDARY +Content-Type: text/html; charset="iso-8859-1" + + +--BOUNDARY +Content-Type: text/plain + + +--BOUNDARY +Content-Type: text/plain; charset="koi8-r" + + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_10.txt b/third_party/stdlib/email/test/data/msg_10.txt new file mode 100644 index 00000000..07903960 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_10.txt @@ -0,0 +1,39 @@ +MIME-Version: 1.0 +From: Barry Warsaw +To: Dingus Lovers +Subject: Lyrics +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit + +This is a 7bit encoded message. + +--BOUNDARY +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: Quoted-Printable + +=A1This is a Quoted Printable encoded message! + +--BOUNDARY +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: Base64 + +VGhpcyBpcyBhIEJhc2U2NCBlbmNvZGVkIG1lc3NhZ2Uu + + +--BOUNDARY +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: Base64 + +VGhpcyBpcyBhIEJhc2U2NCBlbmNvZGVkIG1lc3NhZ2UuCg== + + +--BOUNDARY +Content-Type: text/plain; charset="iso-8859-1" + +This has no Content-Transfer-Encoding: header. + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_11.txt b/third_party/stdlib/email/test/data/msg_11.txt new file mode 100644 index 00000000..8f7f1991 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_11.txt @@ -0,0 +1,7 @@ +Content-Type: message/rfc822 +MIME-Version: 1.0 +Subject: The enclosing message + +Subject: An enclosed message + +Here is the body of the message. diff --git a/third_party/stdlib/email/test/data/msg_12.txt b/third_party/stdlib/email/test/data/msg_12.txt new file mode 100644 index 00000000..4bec8d94 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_12.txt @@ -0,0 +1,36 @@ +MIME-Version: 1.0 +From: Barry Warsaw +To: Dingus Lovers +Subject: Lyrics +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + + +--BOUNDARY +Content-Type: text/html; charset="iso-8859-1" + + +--BOUNDARY +Content-Type: multipart/mixed; boundary="ANOTHER" + +--ANOTHER +Content-Type: text/plain; charset="iso-8859-2" + + +--ANOTHER +Content-Type: text/plain; charset="iso-8859-3" + +--ANOTHER-- + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + + +--BOUNDARY +Content-Type: text/plain; charset="koi8-r" + + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_12a.txt b/third_party/stdlib/email/test/data/msg_12a.txt new file mode 100644 index 00000000..e94224ec --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_12a.txt @@ -0,0 +1,38 @@ +MIME-Version: 1.0 +From: Barry Warsaw +To: Dingus Lovers +Subject: Lyrics +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + + +--BOUNDARY +Content-Type: text/html; charset="iso-8859-1" + + +--BOUNDARY +Content-Type: multipart/mixed; boundary="ANOTHER" + +--ANOTHER +Content-Type: text/plain; charset="iso-8859-2" + + +--ANOTHER +Content-Type: text/plain; charset="iso-8859-3" + + +--ANOTHER-- + + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + + +--BOUNDARY +Content-Type: text/plain; charset="koi8-r" + + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_13.txt b/third_party/stdlib/email/test/data/msg_13.txt new file mode 100644 index 00000000..8e6d52d5 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_13.txt @@ -0,0 +1,94 @@ +MIME-Version: 1.0 +From: Barry +To: Dingus Lovers +Subject: Here is your dingus fish +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="OUTER" + +--OUTER +Content-Type: text/plain; charset="us-ascii" + +A text/plain part + +--OUTER +Content-Type: multipart/mixed; boundary=BOUNDARY + + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + +Hi there, + +This is the dingus fish. + +--BOUNDARY +Content-Type: image/gif; name="dingusfish.gif" +Content-Transfer-Encoding: base64 +content-disposition: attachment; filename="dingusfish.gif" + +R0lGODdhAAEAAfAAAP///wAAACwAAAAAAAEAAQAC/oSPqcvtD6OctNqLs968+w+G4kiW5omm6sq2 +7gvH8kzX9o3n+s73/g8MCofEovGITGICTKbyCV0FDNOo9SqpQqpOrJfXzTQj2vD3TGtqL+NtGQ2f +qTXmxzuOd7WXdcc9DyjU53ewFni4s0fGhdiYaEhGBelICTNoV1j5NUnFcrmUqemjNifJVWpaOqaI +oFq3SspZsSraE7sHq3jr1MZqWvi662vxV4tD+pvKW6aLDOCLyur8PDwbanyDeq0N3DctbQYeLDvR +RY6t95m6UB0d3mwIrV7e2VGNvjjffukeJp4w7F65KecGFsTHQGAygOrgrWs1jt28Rc88KESYcGLA +/obvTkH6p+CinWJiJmIMqXGQwH/y4qk0SYjgQTczT3ajKZGfuI0uJ4kkVI/DT5s3/ejkxI0aT4Y+ +YTYgWbImUaXk9nlLmnSh1qJiJFl0OpUqRK4oOy7NyRQtHWofhoYVxkwWXKUSn0YsS+fUV6lhqfYb +6ayd3Z5qQdG1B7bvQzaJjwUV2lixMUZ7JVsOlfjWVr/3NB/uFvnySBN6Dcb6rGwaRM3wsormw5cC +M9NxWy/bWdufudCvy8bOAjXjVVwta/uO21sE5RHBCzNFXtgq9ORtH4eYjVP4Yryo026nvkFmCeyA +B29efV6ravCMK5JwWd5897Qrx7ll38o6iHDZ/rXPR//feevhF4l7wjUGX3xq1eeRfM4RSJGBIV1D +z1gKPkfWag3mVBVvva1RlX5bAJTPR/2YqNtw/FkIYYEi/pIZiAdpcxpoHtmnYYoZtvhUftzdx5ZX +JSKDW405zkGcZzzGZ6KEv4FI224oDmijlEf+xp6MJK5ojY/ASeVUR+wsKRuJ+XFZ5o7ZeEime8t1 +ouUsU6YjF5ZtUihhkGfCdFQLWQFJ3UXxmElfhQnR+eCdcDbkFZp6vTRmj56ApCihn5QGpaToNZmR +n3NVSpZcQpZ2KEONusaiCsKAug0wkQbJSFO+PTSjneGxOuFjPlUk3ovWvdIerjUg9ZGIOtGq/qeX +eCYrrCX+1UPsgTKGGRSbzd5q156d/gpfbJxe66eD5iQKrXj7RGgruGxs62qebBHUKS32CKluCiqZ +qh+pmehmEb71noAUoe5e9Zm17S7773V10pjrtG4CmuurCV/n6zLK5turWNhqOvFXbjhZrMD0YhKe +wR0zOyuvsh6MWrGoIuzvyWu5y1WIFAqmJselypxXh6dKLNOKEB98L88bS2rkNqqlKzCNJp9c0G0j +Gzh0iRrCbHSXmPR643QS+4rWhgFmnSbSuXCjS0xAOWkU2UdLqyuUNfHSFdUouy3bm5i5GnDM3tG8 +doJ4r5tqu3pPbRSVfvs8uJzeNXhp3n4j/tZ42SwH7eaWUUOjc3qFV9453UHTXZfcLH+OeNs5g36x +lBnHvTm7EbMbLeuaLncao8vWCXimfo1o+843Ak6y4ChNeGntvAYvfLK4ezmoyNIbNCLTCXO9ZV3A +E8/s88RczPzDwI4Ob7XZyl7+9Miban29h+tJZPrE21wgvBphDfrrfPdCTPKJD/y98L1rZwHcV6Jq +Zab0metpuNIX/qAFPoz171WUaUb4HAhBSzHuHfjzHb3kha/2Cctis/ORArVHNYfFyYRH2pYIRzic +isVOfPWD1b6mRTqpCRBozzof6UZVvFXRxWIr3GGrEviGYgyPMfahheiSaLs/9QeFu7oZ/ndSY8DD +ya9x+uPed+7mxN2IzIISBOMLFYWVqC3Pew1T2nFuuCiwZS5/v6II10i4t1OJcUH2U9zxKodHsGGv +Oa+zkvNUYUOa/TCCRutF9MzDwdlUMJADTCGSbDQ5OV4PTamDoPEi6Ecc/RF5RWwkcdSXvSOaDWSn +I9LlvubFTQpuc6JKXLcKeb+xdbKRBnwREemXyjg6ME65aJiOuBgrktzykfPLJBKR9ClMavJ62/Ff +BlNIyod9yX9wcSXexnXFpvkrbXk64xsx5Db7wXKP5fSgsvwIMM/9631VLBfkmtbHRXpqmtei52hG +pUwSlo+BASQoeILDOBgREECxBBh5/iYmNsQ9dIv5+OI++QkqdsJPc3uykz5fkM+OraeekcQF7X4n +B5S67za5U967PmooGQhUXfF7afXyCD7ONdRe17QogYjVx38uLwtrS6nhTnm15LQUnu9E2uK6CNI/ +1HOABj0ESwOjut4FEpFQpdNAm4K2LHnDWHNcmKB2ioKBogysVZtMO2nSxUdZ8Yk2kJc7URioLVI0 +YgmtIwZj4LoeKemgnOnbUdGnzZ4Oa6scqiolBGqS6RgWNLu0RMhcaE6rhhU4hiuqFXPAG8fGwTPW +FKeLMtdVmXLSs5YJGF/YeVm7rREMlY3UYE+yCxbaMXX8y15m5zVHq6GOKDMynzII/jdUHdyVqIy0 +ifX2+r/EgtZcvRzSb72gU9ui87M2VecjKildW/aFqaYhKoryUjfB/g4qtyVuc60xFDGmCxwjW+qu +zjuwl2GkOWn66+3QiiEctvd04OVvcCVzjgT7lrkvjVGKKHmmlDUKowSeikb5kK/mJReuWOxONx+s +ULsl+Lqb0CVn0SrVyJ6wt4t6yTeSCafhPhAf0OXn6L60UMxiLolFAtmN35S2Ob1lZpQ1r/n0Qb5D +oQ1zJiRVDgF8N3Q8TYfbi3DyWCy3lT1nxyBs6FT3S2GOzWRlxwKvlRP0RPJA9SjxEy0UoEnkA+M4 +cnzLMJrBGWLFEaaUb5lvpqbq/loOaU5+DFuHPxo82/OZuM8FXG3oVNZhtWpMpb/0Xu5m/LfLhHZQ +7yuVI0MqZ7NE43imC8jH3IwGZlbPm0xkJYs7+2U48hXTsFSMqgGDvai0kLxyynKNT/waj+q1c1tz +GjOpPBgdCSq3UKZxCSsqFIY+O6JbAWGWcV1pwqLyj5sGqCF1xb1F3varUWqrJv6cN3PrUXzijtfZ +FshpBL3Xwr4GIPvU2N8EjrJgS1zl21rbXQMXeXc5jjFyrhpCzijSv/RQtyPSzHCFMhlME95fHglt +pRsX+dfSQjUeHAlpWzJ5iOo79Ldnaxai6bXTcGO3fp07ri7HLEmXXPlYi8bv/qVxvNcdra6m7Rlb +6JBTb5fd66VhFRjGArh2n7R1rDW4P5NOT9K0I183T2scYkeZ3q/VFyLb09U9ajzXBS8Kgkhc4mBS +kYY9cy3Vy9lUnuNJH8HGIclUilwnBtjUOH0gteGOZ4c/XNrhXLSYDyxfnD8z1pDy7rYRvDolhnbe +UMzxCZUs40s6s7UIvBnLgc0+vKuOkIXeOrDymlp+Zxra4MZLBbVrqD/jTJ597pDmnw5c4+DbyB88 +9Cg9DodYcSuMZT/114pptqc/EuTjRPvH/z5slzI3tluOEBBLqOXLOX+0I5929tO97wkvl/atCz+y +xJrdwteW2FNW/NSmBP+f/maYtVs/bYyBC7Ox3jsYZHL05CIrBa/nS+b3bHfiYm4Ueil1YZZSgAUI +fFZ1dxUmeA2oQRQ3RuGXNGLFV9/XbGFGPV6kfzk1TBBCd+izc7q1H+OHMJwmaBX2IQNYVAKHYepV +SSGCe6CnbYHHETKGNe43EDvFgZr0gB/nVHPHZ80VV1ojOiI3XDvYIkl4ayo4bxQIgrFXWTvBI0nH +VElWMuw2aLUWCRHHf8ymVCHjFlJnOSojfevCYyyyZDH0IcvHhrsnQ5O1OsWzONuVVKIxSxiFZ/tR +fKDAf6xFTnw4O9Qig2VCfW2hJQrmMOuHW0W3dLQmCMO2ccdUd/xyfflH/olTiHZVdGwb8nIwRzSE +J15jFlOJuBZBZ4CiyHyd2IFylFlB+HgHhYabhWOGwYO1ZH/Og1dtQlFMk352CGRSIFTapnWQEUtN +l4zv8S0aaCFDyGCBqDUxZYpxGHX01y/JuH1xhn7TOCnNCI4eKDs5WGX4R425F4vF1o3BJ4vO0otq +I3rimI7jJY1jISqnBxknCIvruF83mF5wN4X7qGLIhR8A2Vg0yFERSIXn9Vv3GHy3Vj/WIkKddlYi +yIMv2I/VMjTLpW7pt05SWIZR0RPyxpB4SIUM9lBPGBl0GC7oSEEwRYLe4pJpZY2P0zbI1n+Oc44w +qY3PUnmF0ixjVpDD/mJ9wpOBGTVgXlaCaZiPcIWK5NiKBIiPdGaQ0TWGvAiG7nMchdZb7Vgf8zNi +MuMyzRdy/lePe9iC4TRx7WhhOQI/QiSVNAmAa2lT/piFbuh7ofJoYSZzrSZ1bvmWw3eN2nKUPVky +uPN5/VRfohRd0VYZoqhKIlU6TXYhJxmPUIloAwc1bPmHEpaZYZORHNlXUJM07hATwHR8MJYqkwWR +WaIezFhxSFlc8/Fq82hEnpeRozg3ULhhr9lAGtVEkCg5ZNRuuVleBPaZadhG0ZgkyPmDOTOKzViM +YgOcpukKqQcbjAWS0IleQ2ROjdh6A+md1qWdBRSX7iSYgFRTtRmBpJioieXJiHfJiMGIR9fJOn8I +MSfXYhspn4ooSa2mSAj4n+8Bmg03fBJZoPOJgsVZRxu1oOMRPXYYjdqjihFaEoZpXBREanuJoRI6 +cibFinq4ngUKh/wQd/H5ofYCZ0HJXR62opZFaAT0iFIZo4DIiUojkjeqKiuoZirKo5Y1a7AWckGa +BkuYoD5lpDK6eUs6CkDqpETwl1EqpfhJpVeKpVl6EgUAADs= + +--BOUNDARY-- + +--OUTER-- diff --git a/third_party/stdlib/email/test/data/msg_14.txt b/third_party/stdlib/email/test/data/msg_14.txt new file mode 100644 index 00000000..5d98d2fd --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_14.txt @@ -0,0 +1,23 @@ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Type: text; charset=us-ascii +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 + + +Hi, + +I'm sorry but I'm using a drainbread ISP, which although big and +wealthy can't seem to generate standard compliant email. :( + +This message has a Content-Type: header with no subtype. I hope you +can still read it. + +-Me diff --git a/third_party/stdlib/email/test/data/msg_15.txt b/third_party/stdlib/email/test/data/msg_15.txt new file mode 100644 index 00000000..33b8487a --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_15.txt @@ -0,0 +1,52 @@ +Return-Path: +Received: from fepD.post.tele.dk (195.41.46.149) by mail.groupcare.dk (LSMTP for Windows NT v1.1b) with SMTP id <0.0014F8A2@mail.groupcare.dk>; Mon, 30 Apr 2001 12:17:50 +0200 +User-Agent: Microsoft-Outlook-Express-Macintosh-Edition/5.02.2106 +Subject: XX +From: xx@xx.dk +To: XX +Message-ID: +Mime-version: 1.0 +Content-type: multipart/mixed; + boundary="MS_Mac_OE_3071477847_720252_MIME_Part" + +> Denne meddelelse er i MIME-format. Da dit postl�sningsprogram ikke forst�r dette format, kan del af eller hele meddelelsen v�re ul�selig. + +--MS_Mac_OE_3071477847_720252_MIME_Part +Content-type: multipart/alternative; + boundary="MS_Mac_OE_3071477847_720252_MIME_Part" + + +--MS_Mac_OE_3071477847_720252_MIME_Part +Content-type: text/plain; charset="ISO-8859-1" +Content-transfer-encoding: quoted-printable + +Some removed test. + +--MS_Mac_OE_3071477847_720252_MIME_Part +Content-type: text/html; charset="ISO-8859-1" +Content-transfer-encoding: quoted-printable + + + +Some removed HTML + + +Some removed text. + + + + +--MS_Mac_OE_3071477847_720252_MIME_Part-- + + +--MS_Mac_OE_3071477847_720252_MIME_Part +Content-type: image/gif; name="xx.gif"; + x-mac-creator="6F676C65"; + x-mac-type="47494666" +Content-disposition: attachment +Content-transfer-encoding: base64 + +Some removed base64 encoded chars. + +--MS_Mac_OE_3071477847_720252_MIME_Part-- + diff --git a/third_party/stdlib/email/test/data/msg_16.txt b/third_party/stdlib/email/test/data/msg_16.txt new file mode 100644 index 00000000..56167e9f --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_16.txt @@ -0,0 +1,123 @@ +Return-Path: <> +Delivered-To: scr-admin@socal-raves.org +Received: from cougar.noc.ucla.edu (cougar.noc.ucla.edu [169.232.10.18]) + by babylon.socal-raves.org (Postfix) with ESMTP id CCC2C51B84 + for ; Sun, 23 Sep 2001 20:13:54 -0700 (PDT) +Received: from sims-ms-daemon by cougar.noc.ucla.edu + (Sun Internet Mail Server sims.3.5.2000.03.23.18.03.p10) + id <0GK500B01D0B8Y@cougar.noc.ucla.edu> for scr-admin@socal-raves.org; Sun, + 23 Sep 2001 20:14:35 -0700 (PDT) +Received: from cougar.noc.ucla.edu + (Sun Internet Mail Server sims.3.5.2000.03.23.18.03.p10) + id <0GK500B01D0B8X@cougar.noc.ucla.edu>; Sun, 23 Sep 2001 20:14:35 -0700 (PDT) +Date: Sun, 23 Sep 2001 20:14:35 -0700 (PDT) +From: Internet Mail Delivery +Subject: Delivery Notification: Delivery has failed +To: scr-admin@socal-raves.org +Message-id: <0GK500B04D0B8X@cougar.noc.ucla.edu> +MIME-version: 1.0 +Sender: scr-owner@socal-raves.org +Errors-To: scr-owner@socal-raves.org +X-BeenThere: scr@socal-raves.org +X-Mailman-Version: 2.1a3 +Precedence: bulk +List-Help: +List-Post: +List-Subscribe: , + +List-Id: SoCal-Raves +List-Unsubscribe: , + +List-Archive: +Content-Type: multipart/report; boundary="Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA)" + + +--Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA) +Content-type: text/plain; charset=ISO-8859-1 + +This report relates to a message you sent with the following header fields: + + Message-id: <002001c144a6$8752e060$56104586@oxy.edu> + Date: Sun, 23 Sep 2001 20:10:55 -0700 + From: "Ian T. Henry" + To: SoCal Raves + Subject: [scr] yeah for Ians!! + +Your message cannot be delivered to the following recipients: + + Recipient address: jangel1@cougar.noc.ucla.edu + Reason: recipient reached disk quota + + +--Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA) +Content-type: message/DELIVERY-STATUS + +Original-envelope-id: 0GK500B4HD0888@cougar.noc.ucla.edu +Reporting-MTA: dns; cougar.noc.ucla.edu + +Action: failed +Status: 5.0.0 (recipient reached disk quota) +Original-recipient: rfc822;jangel1@cougar.noc.ucla.edu +Final-recipient: rfc822;jangel1@cougar.noc.ucla.edu + +--Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA) +Content-type: MESSAGE/RFC822 + +Return-path: scr-admin@socal-raves.org +Received: from sims-ms-daemon by cougar.noc.ucla.edu + (Sun Internet Mail Server sims.3.5.2000.03.23.18.03.p10) + id <0GK500B01D0B8X@cougar.noc.ucla.edu>; Sun, 23 Sep 2001 20:14:35 -0700 (PDT) +Received: from panther.noc.ucla.edu by cougar.noc.ucla.edu + (Sun Internet Mail Server sims.3.5.2000.03.23.18.03.p10) + with ESMTP id <0GK500B4GD0888@cougar.noc.ucla.edu> for jangel1@sims-ms-daemon; + Sun, 23 Sep 2001 20:14:33 -0700 (PDT) +Received: from babylon.socal-raves.org + (ip-209-85-222-117.dreamhost.com [209.85.222.117]) + by panther.noc.ucla.edu (8.9.1a/8.9.1) with ESMTP id UAA09793 for + ; Sun, 23 Sep 2001 20:14:32 -0700 (PDT) +Received: from babylon (localhost [127.0.0.1]) by babylon.socal-raves.org + (Postfix) with ESMTP id D3B2951B70; Sun, 23 Sep 2001 20:13:47 -0700 (PDT) +Received: by babylon.socal-raves.org (Postfix, from userid 60001) + id A611F51B82; Sun, 23 Sep 2001 20:13:46 -0700 (PDT) +Received: from tiger.cc.oxy.edu (tiger.cc.oxy.edu [134.69.3.112]) + by babylon.socal-raves.org (Postfix) with ESMTP id ADA7351B70 for + ; Sun, 23 Sep 2001 20:13:44 -0700 (PDT) +Received: from ent (n16h86.dhcp.oxy.edu [134.69.16.86]) + by tiger.cc.oxy.edu (8.8.8/8.8.8) with SMTP id UAA08100 for + ; Sun, 23 Sep 2001 20:14:24 -0700 (PDT) +Date: Sun, 23 Sep 2001 20:10:55 -0700 +From: "Ian T. Henry" +Subject: [scr] yeah for Ians!! +Sender: scr-admin@socal-raves.org +To: SoCal Raves +Errors-to: scr-admin@socal-raves.org +Message-id: <002001c144a6$8752e060$56104586@oxy.edu> +MIME-version: 1.0 +X-Mailer: Microsoft Outlook Express 5.50.4522.1200 +Content-type: text/plain; charset=us-ascii +Precedence: bulk +Delivered-to: scr-post@babylon.socal-raves.org +Delivered-to: scr@socal-raves.org +X-Converted-To-Plain-Text: from multipart/alternative by demime 0.98e +X-Converted-To-Plain-Text: Alternative section used was text/plain +X-BeenThere: scr@socal-raves.org +X-Mailman-Version: 2.1a3 +List-Help: +List-Post: +List-Subscribe: , + +List-Id: SoCal-Raves +List-Unsubscribe: , + +List-Archive: + +I always love to find more Ian's that are over 3 years old!! + +Ian +_______________________________________________ +For event info, list questions, or to unsubscribe, see http://www.socal-raves.org/ + + + +--Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA)-- + diff --git a/third_party/stdlib/email/test/data/msg_17.txt b/third_party/stdlib/email/test/data/msg_17.txt new file mode 100644 index 00000000..8d86e418 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_17.txt @@ -0,0 +1,12 @@ +MIME-Version: 1.0 +From: Barry +To: Dingus Lovers +Subject: Here is your dingus fish +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +Hi there, + +This is the dingus fish. + +[Non-text (image/gif) part of message omitted, filename dingusfish.gif] diff --git a/third_party/stdlib/email/test/data/msg_18.txt b/third_party/stdlib/email/test/data/msg_18.txt new file mode 100644 index 00000000..f9f4904d --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_18.txt @@ -0,0 +1,6 @@ +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; + spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" + diff --git a/third_party/stdlib/email/test/data/msg_19.txt b/third_party/stdlib/email/test/data/msg_19.txt new file mode 100644 index 00000000..49bf7fcc --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_19.txt @@ -0,0 +1,43 @@ +Send Ppp mailing list submissions to + ppp@zzz.org + +To subscribe or unsubscribe via the World Wide Web, visit + http://www.zzz.org/mailman/listinfo/ppp +or, via email, send a message with subject or body 'help' to + ppp-request@zzz.org + +You can reach the person managing the list at + ppp-admin@zzz.org + +When replying, please edit your Subject line so it is more specific +than "Re: Contents of Ppp digest..." + +Today's Topics: + + 1. testing #1 (Barry A. Warsaw) + 2. testing #2 (Barry A. Warsaw) + 3. testing #3 (Barry A. Warsaw) + 4. testing #4 (Barry A. Warsaw) + 5. testing #5 (Barry A. Warsaw) + +hello + + +hello + + +hello + + +hello + + +hello + + + +_______________________________________________ +Ppp mailing list +Ppp@zzz.org +http://www.zzz.org/mailman/listinfo/ppp + diff --git a/third_party/stdlib/email/test/data/msg_20.txt b/third_party/stdlib/email/test/data/msg_20.txt new file mode 100644 index 00000000..1a6a8878 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_20.txt @@ -0,0 +1,22 @@ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Cc: ccc@zzz.org +CC: ddd@zzz.org +cc: eee@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 + + +Hi, + +Do you like this message? + +-Me diff --git a/third_party/stdlib/email/test/data/msg_21.txt b/third_party/stdlib/email/test/data/msg_21.txt new file mode 100644 index 00000000..23590b25 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_21.txt @@ -0,0 +1,20 @@ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: Test +Content-Type: multipart/mixed; boundary="BOUNDARY" + +MIME message +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +One +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +Two +--BOUNDARY-- +End of MIME message diff --git a/third_party/stdlib/email/test/data/msg_22.txt b/third_party/stdlib/email/test/data/msg_22.txt new file mode 100644 index 00000000..af9de5fa --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_22.txt @@ -0,0 +1,46 @@ +Mime-Version: 1.0 +Message-Id: +Date: Tue, 16 Oct 2001 13:59:25 +0300 +To: a@example.com +From: b@example.com +Content-Type: multipart/mixed; boundary="============_-1208892523==_============" + +--============_-1208892523==_============ +Content-Type: text/plain; charset="us-ascii" ; format="flowed" + +Text text text. +--============_-1208892523==_============ +Content-Id: +Content-Type: image/jpeg; name="wibble.JPG" + ; x-mac-type="4A504547" + ; x-mac-creator="474B4F4E" +Content-Disposition: attachment; filename="wibble.JPG" +Content-Transfer-Encoding: base64 + +/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB +AQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/wAALCAXABIEBAREA +g6bCjjw/pIZSjO6FWFpldjySOmCNrO7DBZibUXhTwtCixw+GtAijVdqxxaPp0aKvmGXa +qrbBQvms0mAMeYS/3iTV1dG0hHaRNK01XblnWxtVdjkHLMIgTyqnk9VB7CrP2KzIINpa +4O7I+zxYO9WV8jZg71Zlb+8rMDkEirAVQFAUAKAFAAAUAYAUDgADgY6DjpRtXj5RxjHA +4wQRj0wQCMdCAewpaKKK/9k= +--============_-1208892523==_============ +Content-Id: +Content-Type: image/jpeg; name="wibble2.JPG" + ; x-mac-type="4A504547" + ; x-mac-creator="474B4F4E" +Content-Disposition: attachment; filename="wibble2.JPG" +Content-Transfer-Encoding: base64 + +/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB +AQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/wAALCAXABJ0BAREA +/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQA +W6NFJJBEkU10kKGTcWMDwxuU+0JHvk8qAtOpNwqSR0n8c3BlDyXHlqsUltHEiTvdXLxR +7vMiGDNJAJWkAMk8ZkCFp5G2oo5W++INrbQtNfTQxJAuXlupz9oS4d5Y1W+E2XlWZJJE +Y7LWYQxTLE1zuMbfBPxw8X2fibVdIbSbI6nLZxX635t9TjtYreWR7WGKJTLJFFKSlozO +0ShxIXM43uC3/9k= +--============_-1208892523==_============ +Content-Type: text/plain; charset="us-ascii" ; format="flowed" + +Text text text. +--============_-1208892523==_============-- + diff --git a/third_party/stdlib/email/test/data/msg_23.txt b/third_party/stdlib/email/test/data/msg_23.txt new file mode 100644 index 00000000..bb2e8ec3 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_23.txt @@ -0,0 +1,8 @@ +From: aperson@dom.ain +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/plain + +A message part +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_24.txt b/third_party/stdlib/email/test/data/msg_24.txt new file mode 100644 index 00000000..4e52339e --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_24.txt @@ -0,0 +1,10 @@ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY + + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_25.txt b/third_party/stdlib/email/test/data/msg_25.txt new file mode 100644 index 00000000..9e35275f --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_25.txt @@ -0,0 +1,117 @@ +From MAILER-DAEMON Fri Apr 06 16:46:09 2001 +Received: from [204.245.199.98] (helo=zinfandel.lacita.com) + by www.linux.org.uk with esmtp (Exim 3.13 #1) + id 14lYR6-0008Iv-00 + for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100 +Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) +Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) +From: Mail Delivery Subsystem +Subject: Returned mail: Too many hops 19 (17 max): from via [199.164.235.226], to +Message-Id: <200104061723.JAB03225@zinfandel.lacita.com> +To: +To: postmaster@zinfandel.lacita.com +MIME-Version: 1.0 +Content-Type: multipart/report; report-type=delivery-status; + bo +Auto-Submitted: auto-generated (failure) + +This is a MIME-encapsulated message + +--JAB03225.986577786/zinfandel.lacita.com + +The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) +from [199.164.235.226] + + ----- The following addresses have delivery notifications ----- + (unrecoverable error) + + ----- Transcript of session follows ----- +554 Too many hops 19 (17 max): from via [199.164.235.226], to + +--JAB03225.986577786/zinfandel.lacita.com +Content-Type: message/delivery-status + +Reporting-MTA: dns; zinfandel.lacita.com +Received-From-MTA: dns; [199.164.235.226] +Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) + +Final-Recipient: rfc822; scoffman@wellpartner.com +Action: failed +Status: 5.4.6 +Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) + +--JAB03225.986577786/zinfandel.lacita.com +Content-Type: text/rfc822-headers + +Return-Path: linuxuser-admin@www.linux.org.uk +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for ; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for ; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for ; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for ; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for ; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for ; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for ; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for ; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800) +Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [195.92.249.252]) + by + fo +Received: from localhost.localdomain + ([ + by + id +Received: from [212.1.130.11] (helo=s1.uklinux.net ident=root) + by + id + fo +Received: from server (ppp-2-22.cvx4.telinco.net [212.1.149.22]) + by + fo +From: Daniel James +Organization: LinuxUser +To: linuxuser@www.linux.org.uk +X-Mailer: KMail [version 1.1.99] +Content-Type: text/plain; + c +MIME-Version: 1.0 +Message-Id: <01040616033903.00962@server> +Content-Transfer-Encoding: 8bit +Subject: [LinuxUser] bulletin no. 45 +Sender: linuxuser-admin@www.linux.org.uk +Errors-To: linuxuser-admin@www.linux.org.uk +X-BeenThere: linuxuser@www.linux.org.uk +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: +List-Post: +List-Subscribe: , + +List-Unsubscribe: , + +Date: Fri, 6 Apr 2001 16:03:39 +0100 + +--JAB03225.986577786/zinfandel.lacita.com-- + + diff --git a/third_party/stdlib/email/test/data/msg_26.txt b/third_party/stdlib/email/test/data/msg_26.txt new file mode 100644 index 00000000..e13203a0 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_26.txt @@ -0,0 +1,45 @@ +Received: from xcar [192.168.0.2] by jeeves.wooster.local + (SMTPD32-7.07 EVAL) id AFF92F0214; Sun, 12 May 2002 08:55:37 +0100 +Date: Sun, 12 May 2002 08:56:15 +0100 +From: Father Time +To: timbo@jeeves.wooster.local +Subject: IMAP file test +Message-ID: <6df65d354b.father.time@rpc.wooster.local> +X-Organization: Home +User-Agent: Messenger-Pro/2.50a (MsgServe/1.50) (RISC-OS/4.02) POPstar/2.03 +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="1618492860--2051301190--113853680" +Status: R +X-UIDL: 319998302 + +This message is in MIME format which your mailer apparently does not support. +You either require a newer version of your software which supports MIME, or +a separate MIME decoding utility. Alternatively, ask the sender of this +message to resend it in a different format. + +--1618492860--2051301190--113853680 +Content-Type: text/plain; charset=us-ascii + +Simple email with attachment. + + +--1618492860--2051301190--113853680 +Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP; load=&fff69c4b; exec=&355dd4d1; access=&03 +Content-Disposition: attachment; filename="clock.bmp" +Content-Transfer-Encoding: base64 + +Qk12AgAAAAAAAHYAAAAoAAAAIAAAACAAAAABAAQAAAAAAAAAAADXDQAA1w0AAAAAAAAA +AAAAAAAAAAAAiAAAiAAAAIiIAIgAAACIAIgAiIgAALu7uwCIiIgAERHdACLuIgAz//8A +zAAAAN0R3QDu7iIA////AAAAAAAAAAAAAAAAAAAAAAAAAAi3AAAAAAAAADeAAAAAAAAA +C3ADMzMzMANwAAAAAAAAAAAHMAAAAANwAAAAAAAAAACAMAd3zPfwAwgAAAAAAAAIAwd/ +f8x/f3AwgAAAAAAAgDB0x/f3//zPAwgAAAAAAAcHfM9////8z/AwAAAAAAiwd/f3//// +////A4AAAAAAcEx/f///////zAMAAAAAiwfM9////3///8zwOAAAAAcHf3////B///// +8DAAAAALB/f3///wd3d3//AwAAAABwTPf//wCQAAD/zAMAAAAAsEx/f///B////8wDAA +AAAHB39////wf/////AwAAAACwf39///8H/////wMAAAAIcHfM9///B////M8DgAAAAA +sHTH///wf///xAMAAAAACHB3f3//8H////cDgAAAAAALB3zH//D//M9wMAAAAAAAgLB0 +z39///xHAwgAAAAAAAgLB3d3RHd3cDCAAAAAAAAAgLAHd0R3cAMIAAAAAAAAgAgLcAAA +AAMwgAgAAAAACDAAAAu7t7cwAAgDgAAAAABzcIAAAAAAAAgDMwAAAAAAN7uwgAAAAAgH +MzMAAAAACH97tzAAAAALu3c3gAAAAAAL+7tzDABAu7f7cAAAAAAACA+3MA7EQAv/sIAA +AAAAAAAIAAAAAAAAAIAAAAAA + +--1618492860--2051301190--113853680-- \ No newline at end of file diff --git a/third_party/stdlib/email/test/data/msg_27.txt b/third_party/stdlib/email/test/data/msg_27.txt new file mode 100644 index 00000000..d0191769 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_27.txt @@ -0,0 +1,15 @@ +Return-Path: +Received: by mail.dom.ain (Postfix, from userid 889) + id B9D0AD35DB; Tue, 4 Jun 2002 21:46:59 -0400 (EDT) +Message-ID: <15613.28051.707126.569693@dom.ain> +Date: Tue, 4 Jun 2002 21:46:59 -0400 +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +Subject: bug demonstration + 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 + more text +From: aperson@dom.ain (Anne P. Erson) +To: bperson@dom.ain (Barney P. Erson) + +test diff --git a/third_party/stdlib/email/test/data/msg_28.txt b/third_party/stdlib/email/test/data/msg_28.txt new file mode 100644 index 00000000..1e4824ca --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_28.txt @@ -0,0 +1,25 @@ +From: aperson@dom.ain +MIME-Version: 1.0 +Content-Type: multipart/digest; boundary=BOUNDARY + +--BOUNDARY +Content-Type: message/rfc822 + +Content-Type: text/plain; charset=us-ascii +To: aa@bb.org +From: cc@dd.org +Subject: ee + +message 1 + +--BOUNDARY +Content-Type: message/rfc822 + +Content-Type: text/plain; charset=us-ascii +To: aa@bb.org +From: cc@dd.org +Subject: ee + +message 2 + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_29.txt b/third_party/stdlib/email/test/data/msg_29.txt new file mode 100644 index 00000000..1fab5616 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_29.txt @@ -0,0 +1,22 @@ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii; + title*0*="us-ascii'en'This%20is%20even%20more%20"; + title*1*="%2A%2A%2Afun%2A%2A%2A%20"; + title*2="isn't it!" +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 + + +Hi, + +Do you like this message? + +-Me diff --git a/third_party/stdlib/email/test/data/msg_30.txt b/third_party/stdlib/email/test/data/msg_30.txt new file mode 100644 index 00000000..4334bb6e --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_30.txt @@ -0,0 +1,23 @@ +From: aperson@dom.ain +MIME-Version: 1.0 +Content-Type: multipart/digest; boundary=BOUNDARY + +--BOUNDARY + +Content-Type: text/plain; charset=us-ascii +To: aa@bb.org +From: cc@dd.org +Subject: ee + +message 1 + +--BOUNDARY + +Content-Type: text/plain; charset=us-ascii +To: aa@bb.org +From: cc@dd.org +Subject: ee + +message 2 + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_31.txt b/third_party/stdlib/email/test/data/msg_31.txt new file mode 100644 index 00000000..1e58e56c --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_31.txt @@ -0,0 +1,15 @@ +From: aperson@dom.ain +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary=BOUNDARY_ + +--BOUNDARY +Content-Type: text/plain + +message 1 + +--BOUNDARY +Content-Type: text/plain + +message 2 + +--BOUNDARY-- diff --git a/third_party/stdlib/email/test/data/msg_32.txt b/third_party/stdlib/email/test/data/msg_32.txt new file mode 100644 index 00000000..07ec5af9 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_32.txt @@ -0,0 +1,14 @@ +Delivered-To: freebsd-isp@freebsd.org +Date: Tue, 26 Sep 2000 12:23:03 -0500 +From: Anne Person +To: Barney Dude +Subject: Re: Limiting Perl CPU Utilization... +Mime-Version: 1.0 +Content-Type: text/plain; charset*=ansi-x3.4-1968''us-ascii +Content-Disposition: inline +User-Agent: Mutt/1.3.8i +Sender: owner-freebsd-isp@FreeBSD.ORG +Precedence: bulk +X-Loop: FreeBSD.org + +Some message. diff --git a/third_party/stdlib/email/test/data/msg_33.txt b/third_party/stdlib/email/test/data/msg_33.txt new file mode 100644 index 00000000..042787a4 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_33.txt @@ -0,0 +1,29 @@ +Delivered-To: freebsd-isp@freebsd.org +Date: Wed, 27 Sep 2000 11:11:09 -0500 +From: Anne Person +To: Barney Dude +Subject: Re: Limiting Perl CPU Utilization... +Mime-Version: 1.0 +Content-Type: multipart/signed; micalg*=ansi-x3.4-1968''pgp-md5; + protocol*=ansi-x3.4-1968''application%2Fpgp-signature; + boundary*="ansi-x3.4-1968''EeQfGwPcQSOJBaQU" +Content-Disposition: inline +Sender: owner-freebsd-isp@FreeBSD.ORG +Precedence: bulk +X-Loop: FreeBSD.org + + +--EeQfGwPcQSOJBaQU +Content-Type: text/plain; charset*=ansi-x3.4-1968''us-ascii +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +part 1 + +--EeQfGwPcQSOJBaQU +Content-Type: text/plain +Content-Disposition: inline + +part 2 + +--EeQfGwPcQSOJBaQU-- diff --git a/third_party/stdlib/email/test/data/msg_34.txt b/third_party/stdlib/email/test/data/msg_34.txt new file mode 100644 index 00000000..055dfea5 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_34.txt @@ -0,0 +1,19 @@ +From: aperson@dom.ain +To: bperson@dom.ain +Content-Type: multipart/digest; boundary=XYZ + +--XYZ +Content-Type: text/plain + + +This is a text plain part that is counter to recommended practice in +RFC 2046, $5.1.5, but is not illegal + +--XYZ + +From: cperson@dom.ain +To: dperson@dom.ain + +A submessage + +--XYZ-- diff --git a/third_party/stdlib/email/test/data/msg_35.txt b/third_party/stdlib/email/test/data/msg_35.txt new file mode 100644 index 00000000..be7d5a2f --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_35.txt @@ -0,0 +1,4 @@ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: here's something interesting +counter to RFC 2822, there's no separating newline here diff --git a/third_party/stdlib/email/test/data/msg_36.txt b/third_party/stdlib/email/test/data/msg_36.txt new file mode 100644 index 00000000..5632c306 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_36.txt @@ -0,0 +1,40 @@ +Mime-Version: 1.0 +Content-Type: Multipart/Mixed; Boundary="NextPart" +To: IETF-Announce:; +From: Internet-Drafts@ietf.org +Subject: I-D ACTION:draft-ietf-mboned-mix-00.txt +Date: Tue, 22 Dec 1998 16:55:06 -0500 + +--NextPart + +Blah blah blah + +--NextPart +Content-Type: Multipart/Alternative; Boundary="OtherAccess" + +--OtherAccess +Content-Type: Message/External-body; + access-type="mail-server"; + server="mailserv@ietf.org" + +Content-Type: text/plain +Content-ID: <19981222151406.I-D@ietf.org> + +ENCODING mime +FILE /internet-drafts/draft-ietf-mboned-mix-00.txt + +--OtherAccess +Content-Type: Message/External-body; + name="draft-ietf-mboned-mix-00.txt"; + site="ftp.ietf.org"; + access-type="anon-ftp"; + directory="internet-drafts" + +Content-Type: text/plain +Content-ID: <19981222151406.I-D@ietf.org> + + +--OtherAccess-- + +--NextPart-- + diff --git a/third_party/stdlib/email/test/data/msg_37.txt b/third_party/stdlib/email/test/data/msg_37.txt new file mode 100644 index 00000000..038d34a1 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_37.txt @@ -0,0 +1,22 @@ +Content-Type: multipart/mixed; boundary=ABCDE + +--ABCDE +Content-Type: text/x-one + +Blah + +--ABCDE +--ABCDE +Content-Type: text/x-two + +Blah + +--ABCDE +--ABCDE +--ABCDE +--ABCDE +Content-Type: text/x-two + +Blah + +--ABCDE-- diff --git a/third_party/stdlib/email/test/data/msg_38.txt b/third_party/stdlib/email/test/data/msg_38.txt new file mode 100644 index 00000000..006df81c --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_38.txt @@ -0,0 +1,101 @@ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" + +------- =_aaaaaaaaaa0 +Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa1" +Content-ID: <20592.1022586929.1@example.com> + +------- =_aaaaaaaaaa1 +Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa2" +Content-ID: <20592.1022586929.2@example.com> + +------- =_aaaaaaaaaa2 +Content-Type: text/plain +Content-ID: <20592.1022586929.3@example.com> +Content-Description: very tricky +Content-Transfer-Encoding: 7bit + + +Unlike the test test_nested-multiples-with-internal-boundary, this +piece of text not only contains the outer boundary tags +------- =_aaaaaaaaaa1 +and +------- =_aaaaaaaaaa0 +but puts them at the start of a line! And, to be even nastier, it +even includes a couple of end tags, such as this one: + +------- =_aaaaaaaaaa1-- + +and this one, which is from a multipart we haven't even seen yet! + +------- =_aaaaaaaaaa4-- + +This will, I'm sure, cause much breakage of MIME parsers. But, as +far as I can tell, it's perfectly legal. I have not yet ever seen +a case of this in the wild, but I've seen *similar* things. + + +------- =_aaaaaaaaaa2 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.4@example.com> +Content-Description: patch2 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa2-- + +------- =_aaaaaaaaaa1 +Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa3" +Content-ID: <20592.1022586929.6@example.com> + +------- =_aaaaaaaaaa3 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.7@example.com> +Content-Description: patch3 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa3 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.8@example.com> +Content-Description: patch4 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa3-- + +------- =_aaaaaaaaaa1 +Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa4" +Content-ID: <20592.1022586929.10@example.com> + +------- =_aaaaaaaaaa4 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.11@example.com> +Content-Description: patch5 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa4 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.12@example.com> +Content-Description: patch6 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa4-- + +------- =_aaaaaaaaaa1-- + +------- =_aaaaaaaaaa0 +Content-Type: text/plain; charset="us-ascii" +Content-ID: <20592.1022586929.15@example.com> + +-- +It's never too late to have a happy childhood. + +------- =_aaaaaaaaaa0-- diff --git a/third_party/stdlib/email/test/data/msg_39.txt b/third_party/stdlib/email/test/data/msg_39.txt new file mode 100644 index 00000000..124b2691 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_39.txt @@ -0,0 +1,83 @@ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" + +------- =_aaaaaaaaaa0 +Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa1" +Content-ID: <20592.1022586929.1@example.com> + +------- =_aaaaaaaaaa1 +Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1" +Content-ID: <20592.1022586929.2@example.com> + +------- =_aaaaaaaaaa1 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.3@example.com> +Content-Description: patch1 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa1 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.4@example.com> +Content-Description: patch2 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa1-- + +------- =_aaaaaaaaaa1 +Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1" +Content-ID: <20592.1022586929.6@example.com> + +------- =_aaaaaaaaaa1 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.7@example.com> +Content-Description: patch3 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa1 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.8@example.com> +Content-Description: patch4 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa1-- + +------- =_aaaaaaaaaa1 +Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1" +Content-ID: <20592.1022586929.10@example.com> + +------- =_aaaaaaaaaa1 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.11@example.com> +Content-Description: patch5 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa1 +Content-Type: application/octet-stream +Content-ID: <20592.1022586929.12@example.com> +Content-Description: patch6 +Content-Transfer-Encoding: base64 + +XXX + +------- =_aaaaaaaaaa1-- + +------- =_aaaaaaaaaa1-- + +------- =_aaaaaaaaaa0 +Content-Type: text/plain; charset="us-ascii" +Content-ID: <20592.1022586929.15@example.com> + +-- +It's never too late to have a happy childhood. + +------- =_aaaaaaaaaa0-- diff --git a/third_party/stdlib/email/test/data/msg_40.txt b/third_party/stdlib/email/test/data/msg_40.txt new file mode 100644 index 00000000..1435fa1e --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_40.txt @@ -0,0 +1,10 @@ +MIME-Version: 1.0 +Content-Type: text/html; boundary="--961284236552522269" + +----961284236552522269 +Content-Type: text/html; +Content-Transfer-Encoding: 7Bit + + + +----961284236552522269-- diff --git a/third_party/stdlib/email/test/data/msg_41.txt b/third_party/stdlib/email/test/data/msg_41.txt new file mode 100644 index 00000000..76cdd1cb --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_41.txt @@ -0,0 +1,8 @@ +From: "Allison Dunlap" +To: yyy@example.com +Subject: 64423 +Date: Sun, 11 Jul 2004 16:09:27 -0300 +MIME-Version: 1.0 +Content-Type: multipart/alternative; + +Blah blah blah diff --git a/third_party/stdlib/email/test/data/msg_42.txt b/third_party/stdlib/email/test/data/msg_42.txt new file mode 100644 index 00000000..a75f8f4a --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_42.txt @@ -0,0 +1,20 @@ +Content-Type: multipart/mixed; boundary="AAA" +From: Mail Delivery Subsystem +To: yyy@example.com + +This is a MIME-encapsulated message + +--AAA + +Stuff + +--AAA +Content-Type: message/rfc822 + +From: webmaster@python.org +To: zzz@example.com +Content-Type: multipart/mixed; boundary="BBB" + +--BBB-- + +--AAA-- diff --git a/third_party/stdlib/email/test/data/msg_43.txt b/third_party/stdlib/email/test/data/msg_43.txt new file mode 100644 index 00000000..797d12c5 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_43.txt @@ -0,0 +1,217 @@ +From SRS0=aO/p=ON=bag.python.org=None@bounce2.pobox.com Fri Nov 26 21:40:36 2004 +X-VM-v5-Data: ([nil nil nil nil nil nil nil nil nil] + [nil nil nil nil nil nil nil "MAILER DAEMON <>" "MAILER DAEMON <>" nil nil "Banned file: auto__mail.python.bat in mail from you" "^From:" nil nil nil nil "Banned file: auto__mail.python.bat in mail from you" nil nil nil nil nil nil nil] + nil) +MIME-Version: 1.0 +Message-Id: +Content-Type: multipart/report; report-type=delivery-status; + charset=utf-8; + boundary="----------=_1101526904-1956-5" +X-Virus-Scanned: by XS4ALL Virus Scanner +X-UIDL: 4\G!!! +To: +Subject: Banned file: auto__mail.python.bat in mail from you +Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +This is a multi-part message in MIME format... + +------------=_1101526904-1956-5 +Content-Type: text/plain; charset="utf-8" +Content-Disposition: inline +Content-Transfer-Encoding: 7bit + +BANNED FILENAME ALERT + +Your message to: xxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxx@dot.ca.gov, xxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxxxxx@dot.ca.gov +was blocked by our Spam Firewall. The email you sent with the following subject has NOT BEEN DELIVERED: + +Subject: Delivery_failure_notice + +An attachment in that mail was of a file type that the Spam Firewall is set to block. + + + +------------=_1101526904-1956-5 +Content-Type: message/delivery-status +Content-Disposition: inline +Content-Transfer-Encoding: 7bit +Content-Description: Delivery error report + +Reporting-MTA: dns; sacspam01.dot.ca.gov +Received-From-MTA: smtp; sacspam01.dot.ca.gov ([127.0.0.1]) +Arrival-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +Final-Recipient: rfc822; xxxxxxx@dot.ca.gov +Action: failed +Status: 5.7.1 +Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat +Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) + +------------=_1101526904-1956-5 +Content-Type: text/rfc822-headers +Content-Disposition: inline +Content-Transfer-Encoding: 7bit +Content-Description: Undelivered-message headers + +Received: from kgsav.org (ppp-70-242-162-63.dsl.spfdmo.swbell.net [70.242.162.63]) + by sacspam01.dot.ca.gov (Spam Firewall) with SMTP + id A232AD03DE3A; Fri, 26 Nov 2004 19:41:35 -0800 (PST) +From: webmaster@python.org +To: xxxxx@dot.ca.gov +Date: Sat, 27 Nov 2004 03:35:30 UTC +Subject: Delivery_failure_notice +Importance: Normal +X-Priority: 3 (Normal) +X-MSMail-Priority: Normal +Message-ID: +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="====67bd2b7a5.f99f7" +Content-Transfer-Encoding: 7bit + +------------=_1101526904-1956-5-- + diff --git a/third_party/stdlib/email/test/data/msg_44.txt b/third_party/stdlib/email/test/data/msg_44.txt new file mode 100644 index 00000000..15a22528 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_44.txt @@ -0,0 +1,33 @@ +Return-Path: +Delivered-To: barry@python.org +Received: by mail.python.org (Postfix, from userid 889) + id C2BF0D37C6; Tue, 11 Sep 2001 00:05:05 -0400 (EDT) +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="h90VIIIKmx" +Content-Transfer-Encoding: 7bit +Message-ID: <15261.36209.358846.118674@anthem.python.org> +From: barry@python.org (Barry A. Warsaw) +To: barry@python.org +Subject: a simple multipart +Date: Tue, 11 Sep 2001 00:05:05 -0400 +X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid +X-Attribution: BAW +X-Oblique-Strategy: Make a door into a window + + +--h90VIIIKmx +Content-Type: text/plain; name="msg.txt" +Content-Transfer-Encoding: 7bit + +a simple kind of mirror +to reflect upon our own + +--h90VIIIKmx +Content-Type: text/plain; name="msg.txt" +Content-Transfer-Encoding: 7bit + +a simple kind of mirror +to reflect upon our own + +--h90VIIIKmx-- + diff --git a/third_party/stdlib/email/test/data/msg_45.txt b/third_party/stdlib/email/test/data/msg_45.txt new file mode 100644 index 00000000..58fde956 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_45.txt @@ -0,0 +1,33 @@ +From: +To: +Subject: test +X-Long-Line: Some really long line contains a lot of text and thus has to be rewrapped because it is some + really long + line +MIME-Version: 1.0 +Content-Type: multipart/signed; boundary="borderline"; + protocol="application/pgp-signature"; micalg=pgp-sha1 + +This is an OpenPGP/MIME signed message (RFC 2440 and 3156) +--borderline +Content-Type: text/plain +X-Long-Line: Another really long line contains a lot of text and thus has to be rewrapped because it is another + really long + line + +This is the signed contents. + +--borderline +Content-Type: application/pgp-signature; name="signature.asc" +Content-Description: OpenPGP digital signature +Content-Disposition: attachment; filename="signature.asc" + +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v2.0.6 (GNU/Linux) + +iD8DBQFG03voRhp6o4m9dFsRApSZAKCCAN3IkJlVRg6NvAiMHlvvIuMGPQCeLZtj +FGwfnRHFBFO/S4/DKysm0lI= +=t7+s +-----END PGP SIGNATURE----- + +--borderline-- diff --git a/third_party/stdlib/email/test/data/msg_46.txt b/third_party/stdlib/email/test/data/msg_46.txt new file mode 100644 index 00000000..1e22c4f6 --- /dev/null +++ b/third_party/stdlib/email/test/data/msg_46.txt @@ -0,0 +1,23 @@ +Return-Path: +Delivery-Date: Mon, 08 Feb 2010 14:05:16 +0100 +Received: from example.org (example.org [64.5.53.58]) + by example.net (node=mxbap2) with ESMTP (Nemesis) + id UNIQUE for someone@example.com; Mon, 08 Feb 2010 14:05:16 +0100 +Date: Mon, 01 Feb 2010 12:21:16 +0100 +From: "Sender" +To: +Subject: GroupwiseForwardingTest +Mime-Version: 1.0 +Content-Type: message/rfc822 + +Return-path: +Message-ID: <4B66B890.4070408@teconcept.de> +Date: Mon, 01 Feb 2010 12:18:40 +0100 +From: "Dr. Sender" +MIME-Version: 1.0 +To: "Recipient" +Subject: GroupwiseForwardingTest +Content-Type: text/plain; charset=ISO-8859-15 +Content-Transfer-Encoding: 7bit + +Testing email forwarding with Groupwise 1.2.2010 diff --git a/third_party/stdlib/email/test/test_email.py b/third_party/stdlib/email/test/test_email.py new file mode 100644 index 00000000..160306c5 --- /dev/null +++ b/third_party/stdlib/email/test/test_email.py @@ -0,0 +1,3622 @@ +# Copyright (C) 2001-2010 Python Software Foundation +# Contact: email-sig@python.org +# email package unit tests + +import os +import sys +import time +import base64 +import difflib +import unittest +import warnings +import textwrap +from cStringIO import StringIO +from random import choice +try: + from threading import Thread +except ImportError: + from dummy_threading import Thread + +import email + +from email.Charset import Charset +from email.Header import Header, decode_header, make_header +from email.Parser import Parser, HeaderParser +from email.Generator import Generator, DecodedGenerator +from email.Message import Message +from email.MIMEAudio import MIMEAudio +from email.MIMEText import MIMEText +from email.MIMEImage import MIMEImage +from email.MIMEBase import MIMEBase +from email.MIMEMessage import MIMEMessage +from email.MIMEMultipart import MIMEMultipart +from email import Utils +from email import Errors +from email import Encoders +from email import Iterators +from email import base64MIME +from email import quopriMIME + +from test.test_support import findfile, run_unittest, start_threads +from email.test import __file__ as landmark + + +NL = '\n' +EMPTYSTRING = '' +SPACE = ' ' + + + +def openfile(filename, mode='r'): + path = os.path.join(os.path.dirname(landmark), 'data', filename) + return open(path, mode) + + + +# Base test class +class TestEmailBase(unittest.TestCase): + def ndiffAssertEqual(self, first, second): + """Like assertEqual except use ndiff for readable output.""" + if first != second: + sfirst = str(first) + ssecond = str(second) + diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) + fp = StringIO() + print >> fp, NL, NL.join(diff) + raise self.failureException, fp.getvalue() + + def _msgobj(self, filename): + fp = openfile(findfile(filename)) + try: + msg = email.message_from_file(fp) + finally: + fp.close() + return msg + + + +# Test various aspects of the Message class's API +class TestMessageAPI(TestEmailBase): + def test_get_all(self): + eq = self.assertEqual + msg = self._msgobj('msg_20.txt') + eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) + eq(msg.get_all('xx', 'n/a'), 'n/a') + + def test_getset_charset(self): + eq = self.assertEqual + msg = Message() + eq(msg.get_charset(), None) + charset = Charset('iso-8859-1') + msg.set_charset(charset) + eq(msg['mime-version'], '1.0') + eq(msg.get_content_type(), 'text/plain') + eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') + eq(msg.get_param('charset'), 'iso-8859-1') + eq(msg['content-transfer-encoding'], 'quoted-printable') + eq(msg.get_charset().input_charset, 'iso-8859-1') + # Remove the charset + msg.set_charset(None) + eq(msg.get_charset(), None) + eq(msg['content-type'], 'text/plain') + # Try adding a charset when there's already MIME headers present + msg = Message() + msg['MIME-Version'] = '2.0' + msg['Content-Type'] = 'text/x-weird' + msg['Content-Transfer-Encoding'] = 'quinted-puntable' + msg.set_charset(charset) + eq(msg['mime-version'], '2.0') + eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') + eq(msg['content-transfer-encoding'], 'quinted-puntable') + + def test_set_charset_from_string(self): + eq = self.assertEqual + msg = Message() + msg.set_charset('us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + def test_set_payload_with_charset(self): + msg = Message() + charset = Charset('iso-8859-1') + msg.set_payload('This is a string payload', charset) + self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') + + def test_get_charsets(self): + eq = self.assertEqual + + msg = self._msgobj('msg_08.txt') + charsets = msg.get_charsets() + eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) + + msg = self._msgobj('msg_09.txt') + charsets = msg.get_charsets('dingbat') + eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', + 'koi8-r']) + + msg = self._msgobj('msg_12.txt') + charsets = msg.get_charsets() + eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', + 'iso-8859-3', 'us-ascii', 'koi8-r']) + + def test_get_filename(self): + eq = self.assertEqual + + msg = self._msgobj('msg_04.txt') + filenames = [p.get_filename() for p in msg.get_payload()] + eq(filenames, ['msg.txt', 'msg.txt']) + + msg = self._msgobj('msg_07.txt') + subpart = msg.get_payload(1) + eq(subpart.get_filename(), 'dingusfish.gif') + + def test_get_filename_with_name_parameter(self): + eq = self.assertEqual + + msg = self._msgobj('msg_44.txt') + filenames = [p.get_filename() for p in msg.get_payload()] + eq(filenames, ['msg.txt', 'msg.txt']) + + def test_get_boundary(self): + eq = self.assertEqual + msg = self._msgobj('msg_07.txt') + # No quotes! + eq(msg.get_boundary(), 'BOUNDARY') + + def test_set_boundary(self): + eq = self.assertEqual + # This one has no existing boundary parameter, but the Content-Type: + # header appears fifth. + msg = self._msgobj('msg_01.txt') + msg.set_boundary('BOUNDARY') + header, value = msg.items()[4] + eq(header.lower(), 'content-type') + eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') + # This one has a Content-Type: header, with a boundary, stuck in the + # middle of its headers. Make sure the order is preserved; it should + # be fifth. + msg = self._msgobj('msg_04.txt') + msg.set_boundary('BOUNDARY') + header, value = msg.items()[4] + eq(header.lower(), 'content-type') + eq(value, 'multipart/mixed; boundary="BOUNDARY"') + # And this one has no Content-Type: header at all. + msg = self._msgobj('msg_03.txt') + self.assertRaises(Errors.HeaderParseError, + msg.set_boundary, 'BOUNDARY') + + def test_make_boundary(self): + msg = MIMEMultipart('form-data') + # Note that when the boundary gets created is an implementation + # detail and might change. + self.assertEqual(msg.items()[0][1], 'multipart/form-data') + # Trigger creation of boundary + msg.as_string() + self.assertEqual(msg.items()[0][1][:33], + 'multipart/form-data; boundary="==') + # XXX: there ought to be tests of the uniqueness of the boundary, too. + + def test_message_rfc822_only(self): + # Issue 7970: message/rfc822 not in multipart parsed by + # HeaderParser caused an exception when flattened. + fp = openfile(findfile('msg_46.txt')) + msgdata = fp.read() + parser = email.Parser.HeaderParser() + msg = parser.parsestr(msgdata) + out = StringIO() + gen = email.Generator.Generator(out, True, 0) + gen.flatten(msg, False) + self.assertEqual(out.getvalue(), msgdata) + + def test_get_decoded_payload(self): + eq = self.assertEqual + msg = self._msgobj('msg_10.txt') + # The outer message is a multipart + eq(msg.get_payload(decode=True), None) + # Subpart 1 is 7bit encoded + eq(msg.get_payload(0).get_payload(decode=True), + 'This is a 7bit encoded message.\n') + # Subpart 2 is quopri + eq(msg.get_payload(1).get_payload(decode=True), + '\xa1This is a Quoted Printable encoded message!\n') + # Subpart 3 is base64 + eq(msg.get_payload(2).get_payload(decode=True), + 'This is a Base64 encoded message.') + # Subpart 4 is base64 with a trailing newline, which + # used to be stripped (issue 7143). + eq(msg.get_payload(3).get_payload(decode=True), + 'This is a Base64 encoded message.\n') + # Subpart 5 has no Content-Transfer-Encoding: header. + eq(msg.get_payload(4).get_payload(decode=True), + 'This has no Content-Transfer-Encoding: header.\n') + + def test_get_decoded_uu_payload(self): + eq = self.assertEqual + msg = Message() + msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') + for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): + msg['content-transfer-encoding'] = cte + eq(msg.get_payload(decode=True), 'hello world') + # Now try some bogus data + msg.set_payload('foo') + eq(msg.get_payload(decode=True), 'foo') + + def test_decode_bogus_uu_payload_quietly(self): + msg = Message() + msg.set_payload('begin 664 foo.txt\n%' % i for i in range(10)]) + msg.set_payload('Test') + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +From: test@dom.ain +References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> + <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> + +Test""") + + def test_no_split_long_header(self): + eq = self.ndiffAssertEqual + hstr = 'References: ' + 'x' * 80 + h = Header(hstr, continuation_ws='\t') + eq(h.encode(), """\ +References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") + + def test_splitting_multiple_long_lines(self): + eq = self.ndiffAssertEqual + hstr = """\ +from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +""" + h = Header(hstr, continuation_ws='\t') + eq(h.encode(), """\ +from babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST)""") + + def test_splitting_first_line_only_is_long(self): + eq = self.ndiffAssertEqual + hstr = """\ +from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) +\tby kronos.mems-exchange.org with esmtp (Exim 4.05) +\tid 17k4h5-00034i-00 +\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" + h = Header(hstr, maxlinelen=78, header_name='Received', + continuation_ws='\t') + eq(h.encode(), """\ +from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] +\thelo=cthulhu.gerg.ca) +\tby kronos.mems-exchange.org with esmtp (Exim 4.05) +\tid 17k4h5-00034i-00 +\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") + + def test_long_8bit_header(self): + eq = self.ndiffAssertEqual + msg = Message() + h = Header('Britische Regierung gibt', 'iso-8859-1', + header_name='Subject') + h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') + msg['Subject'] = h + eq(msg.as_string(), """\ +Subject: =?iso-8859-1?q?Britische_Regierung_gibt?= =?iso-8859-1?q?gr=FCnes?= + =?iso-8859-1?q?_Licht_f=FCr_Offshore-Windkraftprojekte?= + +""") + + def test_long_8bit_header_no_charset(self): + eq = self.ndiffAssertEqual + msg = Message() + msg['Reply-To'] = 'Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte ' + eq(msg.as_string(), """\ +Reply-To: Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte + +""") + + def test_long_to_header(self): + eq = self.ndiffAssertEqual + to = '"Someone Test #A" ,,"Someone Test #B" , "Someone Test #C" , "Someone Test #D" ' + msg = Message() + msg['To'] = to + eq(msg.as_string(0), '''\ +To: "Someone Test #A" , , + "Someone Test #B" , + "Someone Test #C" , + "Someone Test #D" + +''') + + def test_long_line_after_append(self): + eq = self.ndiffAssertEqual + s = 'This is an example of string which has almost the limit of header length.' + h = Header(s) + h.append('Add another line.') + eq(h.encode(), """\ +This is an example of string which has almost the limit of header length. + Add another line.""") + + def test_shorter_line_with_append(self): + eq = self.ndiffAssertEqual + s = 'This is a shorter line.' + h = Header(s) + h.append('Add another sentence. (Surprise?)') + eq(h.encode(), + 'This is a shorter line. Add another sentence. (Surprise?)') + + def test_long_field_name(self): + eq = self.ndiffAssertEqual + fn = 'X-Very-Very-Very-Long-Header-Name' + gs = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " + h = Header(gs, 'iso-8859-1', header_name=fn) + # BAW: this seems broken because the first line is too long + eq(h.encode(), """\ +=?iso-8859-1?q?Die_Mieter_treten_hier_?= + =?iso-8859-1?q?ein_werden_mit_einem_Foerderband_komfortabel_den_Korridor_?= + =?iso-8859-1?q?entlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_g?= + =?iso-8859-1?q?egen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") + + def test_long_received_header(self): + h = 'from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; Wed, 05 Mar 2003 18:10:18 -0700' + msg = Message() + msg['Received-1'] = Header(h, continuation_ws='\t') + msg['Received-2'] = h + self.assertEqual(msg.as_string(), """\ +Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by +\throthgar.la.mastaler.com (tmda-ofmipd) with ESMTP; +\tWed, 05 Mar 2003 18:10:18 -0700 +Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by + hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; + Wed, 05 Mar 2003 18:10:18 -0700 + +""") + + def test_string_headerinst_eq(self): + h = '<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner\'s message of "Thu, 6 Mar 2003 13:58:21 +0100")' + msg = Message() + msg['Received'] = Header(h, header_name='Received', + continuation_ws='\t') + msg['Received'] = h + self.ndiffAssertEqual(msg.as_string(), """\ +Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> +\t(David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100") +Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> + (David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100") + +""") + + def test_long_unbreakable_lines_with_continuation(self): + eq = self.ndiffAssertEqual + msg = Message() + t = """\ + iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 + locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" + msg['Face-1'] = t + msg['Face-2'] = Header(t, header_name='Face-2') + eq(msg.as_string(), """\ +Face-1: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 + locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp +Face-2: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 + locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp + +""") + + def test_another_long_multiline_header(self): + eq = self.ndiffAssertEqual + m = '''\ +Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905); + Wed, 16 Oct 2002 07:41:11 -0700''' + msg = email.message_from_string(m) + eq(msg.as_string(), '''\ +Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with + Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 + +''') + + def test_long_lines_with_different_header(self): + eq = self.ndiffAssertEqual + h = """\ +List-Unsubscribe: , + """ + msg = Message() + msg['List'] = h + msg['List'] = Header(h, header_name='List') + eq(msg.as_string(), """\ +List: List-Unsubscribe: , + +List: List-Unsubscribe: , + + +""") + + + +# Test mangling of "From " lines in the body of a message +class TestFromMangling(unittest.TestCase): + def setUp(self): + self.msg = Message() + self.msg['From'] = 'aaa@bbb.org' + self.msg.set_payload("""\ +From the desk of A.A.A.: +Blah blah blah +""") + + def test_mangled_from(self): + s = StringIO() + g = Generator(s, mangle_from_=True) + g.flatten(self.msg) + self.assertEqual(s.getvalue(), """\ +From: aaa@bbb.org + +>From the desk of A.A.A.: +Blah blah blah +""") + + def test_dont_mangle_from(self): + s = StringIO() + g = Generator(s, mangle_from_=False) + g.flatten(self.msg) + self.assertEqual(s.getvalue(), """\ +From: aaa@bbb.org + +From the desk of A.A.A.: +Blah blah blah +""") + + def test_mangle_from_in_preamble_and_epilog(self): + s = StringIO() + g = Generator(s, mangle_from_=True) + msg = email.message_from_string(textwrap.dedent("""\ + From: foo@bar.com + Mime-Version: 1.0 + Content-Type: multipart/mixed; boundary=XXX + + From somewhere unknown + + --XXX + Content-Type: text/plain + + foo + + --XXX-- + + From somewhere unknowable + """)) + g.flatten(msg) + self.assertEqual(len([1 for x in s.getvalue().split('\n') + if x.startswith('>From ')]), 2) + + +# Test the basic MIMEAudio class +class TestMIMEAudio(unittest.TestCase): + def setUp(self): + # Make sure we pick up the audiotest.au that lives in email/test/data. + # In Python, there's an audiotest.au living in Lib/test but that isn't + # included in some binary distros that don't include the test + # package. The trailing empty string on the .join() is significant + # since findfile() will do a dirname(). + datadir = os.path.join(os.path.dirname(landmark), 'data', '') + fp = open(findfile('audiotest.au', datadir), 'rb') + try: + self._audiodata = fp.read() + finally: + fp.close() + self._au = MIMEAudio(self._audiodata) + + def test_guess_minor_type(self): + self.assertEqual(self._au.get_content_type(), 'audio/basic') + + def test_encoding(self): + payload = self._au.get_payload() + self.assertEqual(base64.decodestring(payload), self._audiodata) + + def test_checkSetMinor(self): + au = MIMEAudio(self._audiodata, 'fish') + self.assertEqual(au.get_content_type(), 'audio/fish') + + def test_add_header(self): + eq = self.assertEqual + self._au.add_header('Content-Disposition', 'attachment', + filename='audiotest.au') + eq(self._au['content-disposition'], + 'attachment; filename="audiotest.au"') + eq(self._au.get_params(header='content-disposition'), + [('attachment', ''), ('filename', 'audiotest.au')]) + eq(self._au.get_param('filename', header='content-disposition'), + 'audiotest.au') + missing = [] + eq(self._au.get_param('attachment', header='content-disposition'), '') + self.assertIs(self._au.get_param('foo', failobj=missing, + header='content-disposition'), missing) + # Try some missing stuff + self.assertIs(self._au.get_param('foobar', missing), missing) + self.assertIs(self._au.get_param('attachment', missing, + header='foobar'), missing) + + + +# Test the basic MIMEImage class +class TestMIMEImage(unittest.TestCase): + def setUp(self): + fp = openfile('PyBanner048.gif') + try: + self._imgdata = fp.read() + finally: + fp.close() + self._im = MIMEImage(self._imgdata) + + def test_guess_minor_type(self): + self.assertEqual(self._im.get_content_type(), 'image/gif') + + def test_encoding(self): + payload = self._im.get_payload() + self.assertEqual(base64.decodestring(payload), self._imgdata) + + def test_checkSetMinor(self): + im = MIMEImage(self._imgdata, 'fish') + self.assertEqual(im.get_content_type(), 'image/fish') + + def test_add_header(self): + eq = self.assertEqual + self._im.add_header('Content-Disposition', 'attachment', + filename='dingusfish.gif') + eq(self._im['content-disposition'], + 'attachment; filename="dingusfish.gif"') + eq(self._im.get_params(header='content-disposition'), + [('attachment', ''), ('filename', 'dingusfish.gif')]) + eq(self._im.get_param('filename', header='content-disposition'), + 'dingusfish.gif') + missing = [] + eq(self._im.get_param('attachment', header='content-disposition'), '') + self.assertIs(self._im.get_param('foo', failobj=missing, + header='content-disposition'), missing) + # Try some missing stuff + self.assertIs(self._im.get_param('foobar', missing), missing) + self.assertIs(self._im.get_param('attachment', missing, + header='foobar'), missing) + + + +# Test the basic MIMEText class +class TestMIMEText(unittest.TestCase): + def setUp(self): + self._msg = MIMEText('hello there') + + def test_types(self): + eq = self.assertEqual + eq(self._msg.get_content_type(), 'text/plain') + eq(self._msg.get_param('charset'), 'us-ascii') + missing = [] + self.assertIs(self._msg.get_param('foobar', missing), missing) + self.assertIs(self._msg.get_param('charset', missing, header='foobar'), + missing) + + def test_payload(self): + self.assertEqual(self._msg.get_payload(), 'hello there') + self.assertFalse(self._msg.is_multipart()) + + def test_charset(self): + eq = self.assertEqual + msg = MIMEText('hello there', _charset='us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + def test_7bit_unicode_input(self): + eq = self.assertEqual + msg = MIMEText(u'hello there', _charset='us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + def test_7bit_unicode_input_no_charset(self): + eq = self.assertEqual + msg = MIMEText(u'hello there') + eq(msg.get_charset(), 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + self.assertIn('hello there', msg.as_string()) + + def test_8bit_unicode_input(self): + teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' + eq = self.assertEqual + msg = MIMEText(teststr, _charset='utf-8') + eq(msg.get_charset().output_charset, 'utf-8') + eq(msg['content-type'], 'text/plain; charset="utf-8"') + eq(msg.get_payload(decode=True), teststr.encode('utf-8')) + + def test_8bit_unicode_input_no_charset(self): + teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' + self.assertRaises(UnicodeEncodeError, MIMEText, teststr) + + + +# Test complicated multipart/* messages +class TestMultipart(TestEmailBase): + def setUp(self): + fp = openfile('PyBanner048.gif') + try: + data = fp.read() + finally: + fp.close() + + container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') + image = MIMEImage(data, name='dingusfish.gif') + image.add_header('content-disposition', 'attachment', + filename='dingusfish.gif') + intro = MIMEText('''\ +Hi there, + +This is the dingus fish. +''') + container.attach(intro) + container.attach(image) + container['From'] = 'Barry ' + container['To'] = 'Dingus Lovers ' + container['Subject'] = 'Here is your dingus fish' + + now = 987809702.54848599 + timetuple = time.localtime(now) + if timetuple[-1] == 0: + tzsecs = time.timezone + else: + tzsecs = time.altzone + if tzsecs > 0: + sign = '-' + else: + sign = '+' + tzoffset = ' %s%04d' % (sign, tzsecs // 36) + container['Date'] = time.strftime( + '%a, %d %b %Y %H:%M:%S', + time.localtime(now)) + tzoffset + self._msg = container + self._im = image + self._txt = intro + + def test_hierarchy(self): + # convenience + eq = self.assertEqual + raises = self.assertRaises + # tests + m = self._msg + self.assertTrue(m.is_multipart()) + eq(m.get_content_type(), 'multipart/mixed') + eq(len(m.get_payload()), 2) + raises(IndexError, m.get_payload, 2) + m0 = m.get_payload(0) + m1 = m.get_payload(1) + self.assertIs(m0, self._txt) + self.assertIs(m1, self._im) + eq(m.get_payload(), [m0, m1]) + self.assertFalse(m0.is_multipart()) + self.assertFalse(m1.is_multipart()) + + def test_empty_multipart_idempotent(self): + text = """\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + + +--BOUNDARY + + +--BOUNDARY-- +""" + msg = Parser().parsestr(text) + self.ndiffAssertEqual(text, msg.as_string()) + + def test_no_parts_in_a_multipart_with_none_epilogue(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.set_boundary('BOUNDARY') + self.ndiffAssertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY + +--BOUNDARY-- +''') + + def test_no_parts_in_a_multipart_with_empty_epilogue(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + outer.epilogue = '' + outer.set_boundary('BOUNDARY') + self.ndiffAssertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + + +--BOUNDARY + +--BOUNDARY-- +''') + + def test_one_part_in_a_multipart(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.set_boundary('BOUNDARY') + msg = MIMEText('hello world') + outer.attach(msg) + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + def test_seq_parts_in_a_multipart_with_empty_preamble(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + + def test_seq_parts_in_a_multipart_with_none_preamble(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = None + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + + def test_seq_parts_in_a_multipart_with_none_epilogue(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.epilogue = None + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + + def test_seq_parts_in_a_multipart_with_empty_epilogue(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.epilogue = '' + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + + def test_seq_parts_in_a_multipart_with_nl_epilogue(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.epilogue = '\n' + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- + +''') + + def test_message_external_body(self): + eq = self.assertEqual + msg = self._msgobj('msg_36.txt') + eq(len(msg.get_payload()), 2) + msg1 = msg.get_payload(1) + eq(msg1.get_content_type(), 'multipart/alternative') + eq(len(msg1.get_payload()), 2) + for subpart in msg1.get_payload(): + eq(subpart.get_content_type(), 'message/external-body') + eq(len(subpart.get_payload()), 1) + subsubpart = subpart.get_payload(0) + eq(subsubpart.get_content_type(), 'text/plain') + + def test_double_boundary(self): + # msg_37.txt is a multipart that contains two dash-boundary's in a + # row. Our interpretation of RFC 2046 calls for ignoring the second + # and subsequent boundaries. + msg = self._msgobj('msg_37.txt') + self.assertEqual(len(msg.get_payload()), 3) + + def test_nested_inner_contains_outer_boundary(self): + eq = self.ndiffAssertEqual + # msg_38.txt has an inner part that contains outer boundaries. My + # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say + # these are illegal and should be interpreted as unterminated inner + # parts. + msg = self._msgobj('msg_38.txt') + sfp = StringIO() + Iterators._structure(msg, sfp) + eq(sfp.getvalue(), """\ +multipart/mixed + multipart/mixed + multipart/alternative + text/plain + text/plain + text/plain + text/plain +""") + + def test_nested_with_same_boundary(self): + eq = self.ndiffAssertEqual + # msg 39.txt is similarly evil in that it's got inner parts that use + # the same boundary as outer parts. Again, I believe the way this is + # parsed is closest to the spirit of RFC 2046 + msg = self._msgobj('msg_39.txt') + sfp = StringIO() + Iterators._structure(msg, sfp) + eq(sfp.getvalue(), """\ +multipart/mixed + multipart/mixed + multipart/alternative + application/octet-stream + application/octet-stream + text/plain +""") + + def test_boundary_in_non_multipart(self): + msg = self._msgobj('msg_40.txt') + self.assertEqual(msg.as_string(), '''\ +MIME-Version: 1.0 +Content-Type: text/html; boundary="--961284236552522269" + +----961284236552522269 +Content-Type: text/html; +Content-Transfer-Encoding: 7Bit + + + +----961284236552522269-- +''') + + def test_boundary_with_leading_space(self): + eq = self.assertEqual + msg = email.message_from_string('''\ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary=" XXXX" + +-- XXXX +Content-Type: text/plain + + +-- XXXX +Content-Type: text/plain + +-- XXXX-- +''') + self.assertTrue(msg.is_multipart()) + eq(msg.get_boundary(), ' XXXX') + eq(len(msg.get_payload()), 2) + + def test_boundary_without_trailing_newline(self): + m = Parser().parsestr("""\ +Content-Type: multipart/mixed; boundary="===============0012394164==" +MIME-Version: 1.0 + +--===============0012394164== +Content-Type: image/file1.jpg +MIME-Version: 1.0 +Content-Transfer-Encoding: base64 + +YXNkZg== +--===============0012394164==--""") + self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') + + + +# Test some badly formatted messages +class TestNonConformant(TestEmailBase): + def test_parse_missing_minor_type(self): + eq = self.assertEqual + msg = self._msgobj('msg_14.txt') + eq(msg.get_content_type(), 'text/plain') + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + + def test_same_boundary_inner_outer(self): + msg = self._msgobj('msg_15.txt') + # XXX We can probably eventually do better + inner = msg.get_payload(0) + self.assertTrue(hasattr(inner, 'defects')) + self.assertEqual(len(inner.defects), 1) + self.assertIsInstance(inner.defects[0], + Errors.StartBoundaryNotFoundDefect) + + def test_multipart_no_boundary(self): + msg = self._msgobj('msg_25.txt') + self.assertIsInstance(msg.get_payload(), str) + self.assertEqual(len(msg.defects), 2) + self.assertIsInstance(msg.defects[0], + Errors.NoBoundaryInMultipartDefect) + self.assertIsInstance(msg.defects[1], + Errors.MultipartInvariantViolationDefect) + + def test_invalid_content_type(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + msg = Message() + # RFC 2045, $5.2 says invalid yields text/plain + msg['Content-Type'] = 'text' + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + eq(msg.get_content_type(), 'text/plain') + # Clear the old value and try something /really/ invalid + del msg['content-type'] + msg['Content-Type'] = 'foo' + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + eq(msg.get_content_type(), 'text/plain') + # Still, make sure that the message is idempotently generated + s = StringIO() + g = Generator(s) + g.flatten(msg) + neq(s.getvalue(), 'Content-Type: foo\n\n') + + def test_no_start_boundary(self): + eq = self.ndiffAssertEqual + msg = self._msgobj('msg_31.txt') + eq(msg.get_payload(), """\ +--BOUNDARY +Content-Type: text/plain + +message 1 + +--BOUNDARY +Content-Type: text/plain + +message 2 + +--BOUNDARY-- +""") + + def test_no_separating_blank_line(self): + eq = self.ndiffAssertEqual + msg = self._msgobj('msg_35.txt') + eq(msg.as_string(), """\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: here's something interesting + +counter to RFC 2822, there's no separating newline here +""") + + def test_lying_multipart(self): + msg = self._msgobj('msg_41.txt') + self.assertTrue(hasattr(msg, 'defects')) + self.assertEqual(len(msg.defects), 2) + self.assertIsInstance(msg.defects[0], + Errors.NoBoundaryInMultipartDefect) + self.assertIsInstance(msg.defects[1], + Errors.MultipartInvariantViolationDefect) + + def test_missing_start_boundary(self): + outer = self._msgobj('msg_42.txt') + # The message structure is: + # + # multipart/mixed + # text/plain + # message/rfc822 + # multipart/mixed [*] + # + # [*] This message is missing its start boundary + bad = outer.get_payload(1).get_payload(0) + self.assertEqual(len(bad.defects), 1) + self.assertIsInstance(bad.defects[0], + Errors.StartBoundaryNotFoundDefect) + + def test_first_line_is_continuation_header(self): + eq = self.assertEqual + m = ' Line 1\nLine 2\nLine 3' + msg = email.message_from_string(m) + eq(msg.keys(), []) + eq(msg.get_payload(), 'Line 2\nLine 3') + eq(len(msg.defects), 1) + self.assertIsInstance(msg.defects[0], + Errors.FirstHeaderLineIsContinuationDefect) + eq(msg.defects[0].line, ' Line 1\n') + + + + +# Test RFC 2047 header encoding and decoding +class TestRFC2047(unittest.TestCase): + def test_rfc2047_multiline(self): + eq = self.assertEqual + s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz + foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" + dh = decode_header(s) + eq(dh, [ + ('Re:', None), + ('r\x8aksm\x9arg\x8cs', 'mac-iceland'), + ('baz foo bar', None), + ('r\x8aksm\x9arg\x8cs', 'mac-iceland')]) + eq(str(make_header(dh)), + """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar + =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""") + + def test_whitespace_eater_unicode(self): + eq = self.assertEqual + s = '=?ISO-8859-1?Q?Andr=E9?= Pirard ' + dh = decode_header(s) + eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard ', None)]) + hu = unicode(make_header(dh)).encode('latin-1') + eq(hu, 'Andr\xe9 Pirard ') + + def test_whitespace_eater_unicode_2(self): + eq = self.assertEqual + s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' + dh = decode_header(s) + eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'), + ('jumped over the', None), ('lazy dog', 'iso-8859-1')]) + hu = make_header(dh).__unicode__() + eq(hu, u'The quick brown fox jumped over the lazy dog') + + def test_rfc2047_without_whitespace(self): + s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' + dh = decode_header(s) + self.assertEqual(dh, [(s, None)]) + + def test_rfc2047_with_whitespace(self): + s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' + dh = decode_header(s) + self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'), + ('rg', None), ('\xe5', 'iso-8859-1'), + ('sbord', None)]) + + def test_rfc2047_B_bad_padding(self): + s = '=?iso-8859-1?B?%s?=' + data = [ # only test complete bytes + ('dm==', 'v'), ('dm=', 'v'), ('dm', 'v'), + ('dmk=', 'vi'), ('dmk', 'vi') + ] + for q, a in data: + dh = decode_header(s % q) + self.assertEqual(dh, [(a, 'iso-8859-1')]) + + def test_rfc2047_Q_invalid_digits(self): + # issue 10004. + s = '=?iso-8659-1?Q?andr=e9=zz?=' + self.assertEqual(decode_header(s), + [(b'andr\xe9=zz', 'iso-8659-1')]) + + +# Test the MIMEMessage class +class TestMIMEMessage(TestEmailBase): + def setUp(self): + fp = openfile('msg_11.txt') + try: + self._text = fp.read() + finally: + fp.close() + + def test_type_error(self): + self.assertRaises(TypeError, MIMEMessage, 'a plain string') + + def test_valid_argument(self): + eq = self.assertEqual + subject = 'A sub-message' + m = Message() + m['Subject'] = subject + r = MIMEMessage(m) + eq(r.get_content_type(), 'message/rfc822') + payload = r.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + subpart = payload[0] + self.assertIs(subpart, m) + eq(subpart['subject'], subject) + + def test_bad_multipart(self): + eq = self.assertEqual + msg1 = Message() + msg1['Subject'] = 'subpart 1' + msg2 = Message() + msg2['Subject'] = 'subpart 2' + r = MIMEMessage(msg1) + self.assertRaises(Errors.MultipartConversionError, r.attach, msg2) + + def test_generate(self): + # First craft the message to be encapsulated + m = Message() + m['Subject'] = 'An enclosed message' + m.set_payload('Here is the body of the message.\n') + r = MIMEMessage(m) + r['Subject'] = 'The enclosing message' + s = StringIO() + g = Generator(s) + g.flatten(r) + self.assertEqual(s.getvalue(), """\ +Content-Type: message/rfc822 +MIME-Version: 1.0 +Subject: The enclosing message + +Subject: An enclosed message + +Here is the body of the message. +""") + + def test_parse_message_rfc822(self): + eq = self.assertEqual + msg = self._msgobj('msg_11.txt') + eq(msg.get_content_type(), 'message/rfc822') + payload = msg.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + submsg = payload[0] + self.assertIsInstance(submsg, Message) + eq(submsg['subject'], 'An enclosed message') + eq(submsg.get_payload(), 'Here is the body of the message.\n') + + def test_dsn(self): + eq = self.assertEqual + # msg 16 is a Delivery Status Notification, see RFC 1894 + msg = self._msgobj('msg_16.txt') + eq(msg.get_content_type(), 'multipart/report') + self.assertTrue(msg.is_multipart()) + eq(len(msg.get_payload()), 3) + # Subpart 1 is a text/plain, human readable section + subpart = msg.get_payload(0) + eq(subpart.get_content_type(), 'text/plain') + eq(subpart.get_payload(), """\ +This report relates to a message you sent with the following header fields: + + Message-id: <002001c144a6$8752e060$56104586@oxy.edu> + Date: Sun, 23 Sep 2001 20:10:55 -0700 + From: "Ian T. Henry" + To: SoCal Raves + Subject: [scr] yeah for Ians!! + +Your message cannot be delivered to the following recipients: + + Recipient address: jangel1@cougar.noc.ucla.edu + Reason: recipient reached disk quota + +""") + # Subpart 2 contains the machine parsable DSN information. It + # consists of two blocks of headers, represented by two nested Message + # objects. + subpart = msg.get_payload(1) + eq(subpart.get_content_type(), 'message/delivery-status') + eq(len(subpart.get_payload()), 2) + # message/delivery-status should treat each block as a bunch of + # headers, i.e. a bunch of Message objects. + dsn1 = subpart.get_payload(0) + self.assertIsInstance(dsn1, Message) + eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') + eq(dsn1.get_param('dns', header='reporting-mta'), '') + # Try a missing one + eq(dsn1.get_param('nsd', header='reporting-mta'), None) + dsn2 = subpart.get_payload(1) + self.assertIsInstance(dsn2, Message) + eq(dsn2['action'], 'failed') + eq(dsn2.get_params(header='original-recipient'), + [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) + eq(dsn2.get_param('rfc822', header='final-recipient'), '') + # Subpart 3 is the original message + subpart = msg.get_payload(2) + eq(subpart.get_content_type(), 'message/rfc822') + payload = subpart.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + subsubpart = payload[0] + self.assertIsInstance(subsubpart, Message) + eq(subsubpart.get_content_type(), 'text/plain') + eq(subsubpart['message-id'], + '<002001c144a6$8752e060$56104586@oxy.edu>') + + def test_epilogue(self): + eq = self.ndiffAssertEqual + fp = openfile('msg_21.txt') + try: + text = fp.read() + finally: + fp.close() + msg = Message() + msg['From'] = 'aperson@dom.ain' + msg['To'] = 'bperson@dom.ain' + msg['Subject'] = 'Test' + msg.preamble = 'MIME message' + msg.epilogue = 'End of MIME message\n' + msg1 = MIMEText('One') + msg2 = MIMEText('Two') + msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') + msg.attach(msg1) + msg.attach(msg2) + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), text) + + def test_no_nl_preamble(self): + eq = self.ndiffAssertEqual + msg = Message() + msg['From'] = 'aperson@dom.ain' + msg['To'] = 'bperson@dom.ain' + msg['Subject'] = 'Test' + msg.preamble = 'MIME message' + msg.epilogue = '' + msg1 = MIMEText('One') + msg2 = MIMEText('Two') + msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') + msg.attach(msg1) + msg.attach(msg2) + eq(msg.as_string(), """\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: Test +Content-Type: multipart/mixed; boundary="BOUNDARY" + +MIME message +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +One +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +Two +--BOUNDARY-- +""") + + def test_default_type(self): + eq = self.assertEqual + fp = openfile('msg_30.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + container1 = msg.get_payload(0) + eq(container1.get_default_type(), 'message/rfc822') + eq(container1.get_content_type(), 'message/rfc822') + container2 = msg.get_payload(1) + eq(container2.get_default_type(), 'message/rfc822') + eq(container2.get_content_type(), 'message/rfc822') + container1a = container1.get_payload(0) + eq(container1a.get_default_type(), 'text/plain') + eq(container1a.get_content_type(), 'text/plain') + container2a = container2.get_payload(0) + eq(container2a.get_default_type(), 'text/plain') + eq(container2a.get_content_type(), 'text/plain') + + def test_default_type_with_explicit_container_type(self): + eq = self.assertEqual + fp = openfile('msg_28.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + container1 = msg.get_payload(0) + eq(container1.get_default_type(), 'message/rfc822') + eq(container1.get_content_type(), 'message/rfc822') + container2 = msg.get_payload(1) + eq(container2.get_default_type(), 'message/rfc822') + eq(container2.get_content_type(), 'message/rfc822') + container1a = container1.get_payload(0) + eq(container1a.get_default_type(), 'text/plain') + eq(container1a.get_content_type(), 'text/plain') + container2a = container2.get_payload(0) + eq(container2a.get_default_type(), 'text/plain') + eq(container2a.get_content_type(), 'text/plain') + + def test_default_type_non_parsed(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + # Set up container + container = MIMEMultipart('digest', 'BOUNDARY') + container.epilogue = '' + # Set up subparts + subpart1a = MIMEText('message 1\n') + subpart2a = MIMEText('message 2\n') + subpart1 = MIMEMessage(subpart1a) + subpart2 = MIMEMessage(subpart2a) + container.attach(subpart1) + container.attach(subpart2) + eq(subpart1.get_content_type(), 'message/rfc822') + eq(subpart1.get_default_type(), 'message/rfc822') + eq(subpart2.get_content_type(), 'message/rfc822') + eq(subpart2.get_default_type(), 'message/rfc822') + neq(container.as_string(0), '''\ +Content-Type: multipart/digest; boundary="BOUNDARY" +MIME-Version: 1.0 + +--BOUNDARY +Content-Type: message/rfc822 +MIME-Version: 1.0 + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 1 + +--BOUNDARY +Content-Type: message/rfc822 +MIME-Version: 1.0 + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 2 + +--BOUNDARY-- +''') + del subpart1['content-type'] + del subpart1['mime-version'] + del subpart2['content-type'] + del subpart2['mime-version'] + eq(subpart1.get_content_type(), 'message/rfc822') + eq(subpart1.get_default_type(), 'message/rfc822') + eq(subpart2.get_content_type(), 'message/rfc822') + eq(subpart2.get_default_type(), 'message/rfc822') + neq(container.as_string(0), '''\ +Content-Type: multipart/digest; boundary="BOUNDARY" +MIME-Version: 1.0 + +--BOUNDARY + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 1 + +--BOUNDARY + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 2 + +--BOUNDARY-- +''') + + def test_mime_attachments_in_constructor(self): + eq = self.assertEqual + text1 = MIMEText('') + text2 = MIMEText('') + msg = MIMEMultipart(_subparts=(text1, text2)) + eq(len(msg.get_payload()), 2) + eq(msg.get_payload(0), text1) + eq(msg.get_payload(1), text2) + + def test_default_multipart_constructor(self): + msg = MIMEMultipart() + self.assertTrue(msg.is_multipart()) + + +# A general test of parser->model->generator idempotency. IOW, read a message +# in, parse it into a message object tree, then without touching the tree, +# regenerate the plain text. The original text and the transformed text +# should be identical. Note: that we ignore the Unix-From since that may +# contain a changed date. +class TestIdempotent(TestEmailBase): + def _msgobj(self, filename): + fp = openfile(filename) + try: + data = fp.read() + finally: + fp.close() + msg = email.message_from_string(data) + return msg, data + + def _idempotent(self, msg, text): + eq = self.ndiffAssertEqual + s = StringIO() + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + eq(text, s.getvalue()) + + def test_parse_text_message(self): + eq = self.assertEqual + msg, text = self._msgobj('msg_01.txt') + eq(msg.get_content_type(), 'text/plain') + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + eq(msg.get_params()[1], ('charset', 'us-ascii')) + eq(msg.get_param('charset'), 'us-ascii') + eq(msg.preamble, None) + eq(msg.epilogue, None) + self._idempotent(msg, text) + + def test_parse_untyped_message(self): + eq = self.assertEqual + msg, text = self._msgobj('msg_03.txt') + eq(msg.get_content_type(), 'text/plain') + eq(msg.get_params(), None) + eq(msg.get_param('charset'), None) + self._idempotent(msg, text) + + def test_simple_multipart(self): + msg, text = self._msgobj('msg_04.txt') + self._idempotent(msg, text) + + def test_MIME_digest(self): + msg, text = self._msgobj('msg_02.txt') + self._idempotent(msg, text) + + def test_long_header(self): + msg, text = self._msgobj('msg_27.txt') + self._idempotent(msg, text) + + def test_MIME_digest_with_part_headers(self): + msg, text = self._msgobj('msg_28.txt') + self._idempotent(msg, text) + + def test_mixed_with_image(self): + msg, text = self._msgobj('msg_06.txt') + self._idempotent(msg, text) + + def test_multipart_report(self): + msg, text = self._msgobj('msg_05.txt') + self._idempotent(msg, text) + + def test_dsn(self): + msg, text = self._msgobj('msg_16.txt') + self._idempotent(msg, text) + + def test_preamble_epilogue(self): + msg, text = self._msgobj('msg_21.txt') + self._idempotent(msg, text) + + def test_multipart_one_part(self): + msg, text = self._msgobj('msg_23.txt') + self._idempotent(msg, text) + + def test_multipart_no_parts(self): + msg, text = self._msgobj('msg_24.txt') + self._idempotent(msg, text) + + def test_no_start_boundary(self): + msg, text = self._msgobj('msg_31.txt') + self._idempotent(msg, text) + + def test_rfc2231_charset(self): + msg, text = self._msgobj('msg_32.txt') + self._idempotent(msg, text) + + def test_more_rfc2231_parameters(self): + msg, text = self._msgobj('msg_33.txt') + self._idempotent(msg, text) + + def test_text_plain_in_a_multipart_digest(self): + msg, text = self._msgobj('msg_34.txt') + self._idempotent(msg, text) + + def test_nested_multipart_mixeds(self): + msg, text = self._msgobj('msg_12a.txt') + self._idempotent(msg, text) + + def test_message_external_body_idempotent(self): + msg, text = self._msgobj('msg_36.txt') + self._idempotent(msg, text) + + def test_content_type(self): + eq = self.assertEqual + # Get a message object and reset the seek pointer for other tests + msg, text = self._msgobj('msg_05.txt') + eq(msg.get_content_type(), 'multipart/report') + # Test the Content-Type: parameters + params = {} + for pk, pv in msg.get_params(): + params[pk] = pv + eq(params['report-type'], 'delivery-status') + eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') + eq(msg.preamble, 'This is a MIME-encapsulated message.\n') + eq(msg.epilogue, '\n') + eq(len(msg.get_payload()), 3) + # Make sure the subparts are what we expect + msg1 = msg.get_payload(0) + eq(msg1.get_content_type(), 'text/plain') + eq(msg1.get_payload(), 'Yadda yadda yadda\n') + msg2 = msg.get_payload(1) + eq(msg2.get_content_type(), 'text/plain') + eq(msg2.get_payload(), 'Yadda yadda yadda\n') + msg3 = msg.get_payload(2) + eq(msg3.get_content_type(), 'message/rfc822') + self.assertIsInstance(msg3, Message) + payload = msg3.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + msg4 = payload[0] + self.assertIsInstance(msg4, Message) + eq(msg4.get_payload(), 'Yadda yadda yadda\n') + + def test_parser(self): + eq = self.assertEqual + msg, text = self._msgobj('msg_06.txt') + # Check some of the outer headers + eq(msg.get_content_type(), 'message/rfc822') + # Make sure the payload is a list of exactly one sub-Message, and that + # that submessage has a type of text/plain + payload = msg.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + msg1 = payload[0] + self.assertIsInstance(msg1, Message) + eq(msg1.get_content_type(), 'text/plain') + self.assertIsInstance(msg1.get_payload(), str) + eq(msg1.get_payload(), '\n') + + + +# Test various other bits of the package's functionality +class TestMiscellaneous(TestEmailBase): + def test_message_from_string(self): + fp = openfile('msg_01.txt') + try: + text = fp.read() + finally: + fp.close() + msg = email.message_from_string(text) + s = StringIO() + # Don't wrap/continue long headers since we're trying to test + # idempotency. + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + self.assertEqual(text, s.getvalue()) + + def test_message_from_file(self): + fp = openfile('msg_01.txt') + try: + text = fp.read() + fp.seek(0) + msg = email.message_from_file(fp) + s = StringIO() + # Don't wrap/continue long headers since we're trying to test + # idempotency. + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + self.assertEqual(text, s.getvalue()) + finally: + fp.close() + + def test_message_from_string_with_class(self): + fp = openfile('msg_01.txt') + try: + text = fp.read() + finally: + fp.close() + # Create a subclass + class MyMessage(Message): + pass + + msg = email.message_from_string(text, MyMessage) + self.assertIsInstance(msg, MyMessage) + # Try something more complicated + fp = openfile('msg_02.txt') + try: + text = fp.read() + finally: + fp.close() + msg = email.message_from_string(text, MyMessage) + for subpart in msg.walk(): + self.assertIsInstance(subpart, MyMessage) + + def test_message_from_file_with_class(self): + # Create a subclass + class MyMessage(Message): + pass + + fp = openfile('msg_01.txt') + try: + msg = email.message_from_file(fp, MyMessage) + finally: + fp.close() + self.assertIsInstance(msg, MyMessage) + # Try something more complicated + fp = openfile('msg_02.txt') + try: + msg = email.message_from_file(fp, MyMessage) + finally: + fp.close() + for subpart in msg.walk(): + self.assertIsInstance(subpart, MyMessage) + + def test__all__(self): + module = __import__('email') + all = module.__all__ + all.sort() + self.assertEqual(all, [ + # Old names + 'Charset', 'Encoders', 'Errors', 'Generator', + 'Header', 'Iterators', 'MIMEAudio', 'MIMEBase', + 'MIMEImage', 'MIMEMessage', 'MIMEMultipart', + 'MIMENonMultipart', 'MIMEText', 'Message', + 'Parser', 'Utils', 'base64MIME', + # new names + 'base64mime', 'charset', 'encoders', 'errors', 'generator', + 'header', 'iterators', 'message', 'message_from_file', + 'message_from_string', 'mime', 'parser', + 'quopriMIME', 'quoprimime', 'utils', + ]) + + def test_formatdate(self): + now = time.time() + self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6], + time.gmtime(now)[:6]) + + def test_formatdate_localtime(self): + now = time.time() + self.assertEqual( + Utils.parsedate(Utils.formatdate(now, localtime=True))[:6], + time.localtime(now)[:6]) + + def test_formatdate_usegmt(self): + now = time.time() + self.assertEqual( + Utils.formatdate(now, localtime=False), + time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) + self.assertEqual( + Utils.formatdate(now, localtime=False, usegmt=True), + time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) + + def test_parsedate_none(self): + self.assertEqual(Utils.parsedate(''), None) + + def test_parsedate_compact(self): + # The FWS after the comma is optional + self.assertEqual(Utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), + Utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) + + def test_parsedate_no_dayofweek(self): + eq = self.assertEqual + eq(Utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), + (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) + + def test_parsedate_compact_no_dayofweek(self): + eq = self.assertEqual + eq(Utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), + (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) + + def test_parsedate_acceptable_to_time_functions(self): + eq = self.assertEqual + timetup = Utils.parsedate('5 Feb 2003 13:47:26 -0800') + t = int(time.mktime(timetup)) + eq(time.localtime(t)[:6], timetup[:6]) + eq(int(time.strftime('%Y', timetup)), 2003) + timetup = Utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') + t = int(time.mktime(timetup[:9])) + eq(time.localtime(t)[:6], timetup[:6]) + eq(int(time.strftime('%Y', timetup[:9])), 2003) + + def test_mktime_tz(self): + self.assertEqual(Utils.mktime_tz((1970, 1, 1, 0, 0, 0, + -1, -1, -1, 0)), 0) + self.assertEqual(Utils.mktime_tz((1970, 1, 1, 0, 0, 0, + -1, -1, -1, 1234)), -1234) + + def test_parsedate_y2k(self): + """Test for parsing a date with a two-digit year. + + Parsing a date with a two-digit year should return the correct + four-digit year. RFC822 allows two-digit years, but RFC2822 (which + obsoletes RFC822) requires four-digit years. + + """ + self.assertEqual(Utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), + Utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) + self.assertEqual(Utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), + Utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) + + def test_parseaddr_empty(self): + self.assertEqual(Utils.parseaddr('<>'), ('', '')) + self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '') + + def test_noquote_dump(self): + self.assertEqual( + Utils.formataddr(('A Silly Person', 'person@dom.ain')), + 'A Silly Person ') + + def test_escape_dump(self): + self.assertEqual( + Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), + r'"A \(Very\) Silly Person" ') + a = r'A \(Special\) Person' + b = 'person@dom.ain' + self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b)) + + def test_escape_backslashes(self): + self.assertEqual( + Utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')), + r'"Arthur \\Backslash\\ Foobar" ') + a = r'Arthur \Backslash\ Foobar' + b = 'person@dom.ain' + self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b)) + + def test_name_with_dot(self): + x = 'John X. Doe ' + y = '"John X. Doe" ' + a, b = ('John X. Doe', 'jxd@example.com') + self.assertEqual(Utils.parseaddr(x), (a, b)) + self.assertEqual(Utils.parseaddr(y), (a, b)) + # formataddr() quotes the name if there's a dot in it + self.assertEqual(Utils.formataddr((a, b)), y) + + def test_parseaddr_preserves_quoted_pairs_in_addresses(self): + # issue 10005. Note that in the third test the second pair of + # backslashes is not actually a quoted pair because it is not inside a + # comment or quoted string: the address being parsed has a quoted + # string containing a quoted backslash, followed by 'example' and two + # backslashes, followed by another quoted string containing a space and + # the word 'example'. parseaddr copies those two backslashes + # literally. Per rfc5322 this is not technically correct since a \ may + # not appear in an address outside of a quoted string. It is probably + # a sensible Postel interpretation, though. + eq = self.assertEqual + eq(Utils.parseaddr('""example" example"@example.com'), + ('', '""example" example"@example.com')) + eq(Utils.parseaddr('"\\"example\\" example"@example.com'), + ('', '"\\"example\\" example"@example.com')) + eq(Utils.parseaddr('"\\\\"example\\\\" example"@example.com'), + ('', '"\\\\"example\\\\" example"@example.com')) + + def test_multiline_from_comment(self): + x = """\ +Foo +\tBar """ + self.assertEqual(Utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) + + def test_quote_dump(self): + self.assertEqual( + Utils.formataddr(('A Silly; Person', 'person@dom.ain')), + r'"A Silly; Person" ') + + def test_fix_eols(self): + eq = self.assertEqual + eq(Utils.fix_eols('hello'), 'hello') + eq(Utils.fix_eols('hello\n'), 'hello\r\n') + eq(Utils.fix_eols('hello\r'), 'hello\r\n') + eq(Utils.fix_eols('hello\r\n'), 'hello\r\n') + eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n') + + def test_charset_richcomparisons(self): + eq = self.assertEqual + ne = self.assertNotEqual + cset1 = Charset() + cset2 = Charset() + eq(cset1, 'us-ascii') + eq(cset1, 'US-ASCII') + eq(cset1, 'Us-AsCiI') + eq('us-ascii', cset1) + eq('US-ASCII', cset1) + eq('Us-AsCiI', cset1) + ne(cset1, 'usascii') + ne(cset1, 'USASCII') + ne(cset1, 'UsAsCiI') + ne('usascii', cset1) + ne('USASCII', cset1) + ne('UsAsCiI', cset1) + eq(cset1, cset2) + eq(cset2, cset1) + + def test_getaddresses(self): + eq = self.assertEqual + eq(Utils.getaddresses(['aperson@dom.ain (Al Person)', + 'Bud Person ']), + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + + def test_getaddresses_nasty(self): + eq = self.assertEqual + eq(Utils.getaddresses(['foo: ;']), [('', '')]) + eq(Utils.getaddresses( + ['[]*-- =~$']), + [('', ''), ('', ''), ('', '*--')]) + eq(Utils.getaddresses( + ['foo: ;', '"Jason R. Mastaler" ']), + [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" + eq = self.assertEqual + addrs = Utils.getaddresses(['User ((nested comment)) ']) + eq(addrs[0][1], 'foo@bar.com') + + def test_make_msgid_collisions(self): + # Test make_msgid uniqueness, even with multiple threads + class MsgidsThread(Thread): + def run(self): + # generate msgids for 3 seconds + self.msgids = [] + append = self.msgids.append + make_msgid = Utils.make_msgid + clock = time.time + tfin = clock() + 3.0 + while clock() < tfin: + append(make_msgid()) + + threads = [MsgidsThread() for i in range(5)] + with start_threads(threads): + pass + all_ids = sum([t.msgids for t in threads], []) + self.assertEqual(len(set(all_ids)), len(all_ids)) + + def test_utils_quote_unquote(self): + eq = self.assertEqual + msg = Message() + msg.add_header('content-disposition', 'attachment', + filename='foo\\wacky"name') + eq(msg.get_filename(), 'foo\\wacky"name') + + def test_get_body_encoding_with_bogus_charset(self): + charset = Charset('not a charset') + self.assertEqual(charset.get_body_encoding(), 'base64') + + def test_get_body_encoding_with_uppercase_charset(self): + eq = self.assertEqual + msg = Message() + msg['Content-Type'] = 'text/plain; charset=UTF-8' + eq(msg['content-type'], 'text/plain; charset=UTF-8') + charsets = msg.get_charsets() + eq(len(charsets), 1) + eq(charsets[0], 'utf-8') + charset = Charset(charsets[0]) + eq(charset.get_body_encoding(), 'base64') + msg.set_payload('hello world', charset=charset) + eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') + eq(msg.get_payload(decode=True), 'hello world') + eq(msg['content-transfer-encoding'], 'base64') + # Try another one + msg = Message() + msg['Content-Type'] = 'text/plain; charset="US-ASCII"' + charsets = msg.get_charsets() + eq(len(charsets), 1) + eq(charsets[0], 'us-ascii') + charset = Charset(charsets[0]) + eq(charset.get_body_encoding(), Encoders.encode_7or8bit) + msg.set_payload('hello world', charset=charset) + eq(msg.get_payload(), 'hello world') + eq(msg['content-transfer-encoding'], '7bit') + + def test_charsets_case_insensitive(self): + lc = Charset('us-ascii') + uc = Charset('US-ASCII') + self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) + + def test_partial_falls_inside_message_delivery_status(self): + eq = self.ndiffAssertEqual + # The Parser interface provides chunks of data to FeedParser in 8192 + # byte gulps. SF bug #1076485 found one of those chunks inside + # message/delivery-status header block, which triggered an + # unreadline() of NeedMoreData. + msg = self._msgobj('msg_43.txt') + sfp = StringIO() + Iterators._structure(msg, sfp) + eq(sfp.getvalue(), """\ +multipart/report + text/plain + message/delivery-status + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/rfc822-headers +""") + + + +# Test the iterator/generators +class TestIterators(TestEmailBase): + def test_body_line_iterator(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + # First a simple non-multipart message + msg = self._msgobj('msg_01.txt') + it = Iterators.body_line_iterator(msg) + lines = list(it) + eq(len(lines), 6) + neq(EMPTYSTRING.join(lines), msg.get_payload()) + # Now a more complicated multipart + msg = self._msgobj('msg_02.txt') + it = Iterators.body_line_iterator(msg) + lines = list(it) + eq(len(lines), 43) + fp = openfile('msg_19.txt') + try: + neq(EMPTYSTRING.join(lines), fp.read()) + finally: + fp.close() + + def test_typed_subpart_iterator(self): + eq = self.assertEqual + msg = self._msgobj('msg_04.txt') + it = Iterators.typed_subpart_iterator(msg, 'text') + lines = [] + subparts = 0 + for subpart in it: + subparts += 1 + lines.append(subpart.get_payload()) + eq(subparts, 2) + eq(EMPTYSTRING.join(lines), """\ +a simple kind of mirror +to reflect upon our own +a simple kind of mirror +to reflect upon our own +""") + + def test_typed_subpart_iterator_default_type(self): + eq = self.assertEqual + msg = self._msgobj('msg_03.txt') + it = Iterators.typed_subpart_iterator(msg, 'text', 'plain') + lines = [] + subparts = 0 + for subpart in it: + subparts += 1 + lines.append(subpart.get_payload()) + eq(subparts, 1) + eq(EMPTYSTRING.join(lines), """\ + +Hi, + +Do you like this message? + +-Me +""") + + def test_pushCR_LF(self): + '''FeedParser BufferedSubFile.push() assumed it received complete + line endings. A CR ending one push() followed by a LF starting + the next push() added an empty line. + ''' + imt = [ + ("a\r \n", 2), + ("b", 0), + ("c\n", 1), + ("", 0), + ("d\r\n", 1), + ("e\r", 0), + ("\nf", 1), + ("\r\n", 1), + ] + from email.feedparser import BufferedSubFile, NeedMoreData + bsf = BufferedSubFile() + om = [] + nt = 0 + for il, n in imt: + bsf.push(il) + nt += n + n1 = 0 + for ol in iter(bsf.readline, NeedMoreData): + om.append(ol) + n1 += 1 + self.assertEqual(n, n1) + self.assertEqual(len(om), nt) + self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) + + def test_push_random(self): + from email.feedparser import BufferedSubFile, NeedMoreData + + n = 10000 + chunksize = 5 + chars = 'abcd \t\r\n' + + s = ''.join(choice(chars) for i in range(n)) + '\n' + target = s.splitlines(True) + + bsf = BufferedSubFile() + lines = [] + for i in range(0, len(s), chunksize): + chunk = s[i:i+chunksize] + bsf.push(chunk) + lines.extend(iter(bsf.readline, NeedMoreData)) + self.assertEqual(lines, target) + + +class TestFeedParsers(TestEmailBase): + + def parse(self, chunks): + from email.feedparser import FeedParser + feedparser = FeedParser() + for chunk in chunks: + feedparser.feed(chunk) + return feedparser.close() + + def test_newlines(self): + m = self.parse(['a:\nb:\rc:\r\nd:\n']) + self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) + m = self.parse(['a:\nb:\rc:\r\nd:']) + self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) + m = self.parse(['a:\rb', 'c:\n']) + self.assertEqual(m.keys(), ['a', 'bc']) + m = self.parse(['a:\r', 'b:\n']) + self.assertEqual(m.keys(), ['a', 'b']) + m = self.parse(['a:\r', '\nb:\n']) + self.assertEqual(m.keys(), ['a', 'b']) + + def test_long_lines(self): + # Expected peak memory use on 32-bit platform: 4*N*M bytes. + M, N = 1000, 20000 + m = self.parse(['a:b\n\n'] + ['x'*M] * N) + self.assertEqual(m.items(), [('a', 'b')]) + self.assertEqual(m.get_payload(), 'x'*M*N) + m = self.parse(['a:b\r\r'] + ['x'*M] * N) + self.assertEqual(m.items(), [('a', 'b')]) + self.assertEqual(m.get_payload(), 'x'*M*N) + m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) + self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) + + +class TestParsers(TestEmailBase): + def test_header_parser(self): + eq = self.assertEqual + # Parse only the headers of a complex multipart MIME document + fp = openfile('msg_02.txt') + try: + msg = HeaderParser().parse(fp) + finally: + fp.close() + eq(msg['from'], 'ppp-request@zzz.org') + eq(msg['to'], 'ppp@zzz.org') + eq(msg.get_content_type(), 'multipart/mixed') + self.assertFalse(msg.is_multipart()) + self.assertIsInstance(msg.get_payload(), str) + + def test_whitespace_continuation(self): + eq = self.assertEqual + # This message contains a line after the Subject: header that has only + # whitespace, but it is not empty! + msg = email.message_from_string("""\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: the next line has a space on it +\x20 +Date: Mon, 8 Apr 2002 15:09:19 -0400 +Message-ID: spam + +Here's the message body +""") + eq(msg['subject'], 'the next line has a space on it\n ') + eq(msg['message-id'], 'spam') + eq(msg.get_payload(), "Here's the message body\n") + + def test_whitespace_continuation_last_header(self): + eq = self.assertEqual + # Like the previous test, but the subject line is the last + # header. + msg = email.message_from_string("""\ +From: aperson@dom.ain +To: bperson@dom.ain +Date: Mon, 8 Apr 2002 15:09:19 -0400 +Message-ID: spam +Subject: the next line has a space on it +\x20 + +Here's the message body +""") + eq(msg['subject'], 'the next line has a space on it\n ') + eq(msg['message-id'], 'spam') + eq(msg.get_payload(), "Here's the message body\n") + + def test_crlf_separation(self): + eq = self.assertEqual + fp = openfile('msg_26.txt', mode='rb') + try: + msg = Parser().parse(fp) + finally: + fp.close() + eq(len(msg.get_payload()), 2) + part1 = msg.get_payload(0) + eq(part1.get_content_type(), 'text/plain') + eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') + part2 = msg.get_payload(1) + eq(part2.get_content_type(), 'application/riscos') + + def test_multipart_digest_with_extra_mime_headers(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + fp = openfile('msg_28.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + # Structure is: + # multipart/digest + # message/rfc822 + # text/plain + # message/rfc822 + # text/plain + eq(msg.is_multipart(), 1) + eq(len(msg.get_payload()), 2) + part1 = msg.get_payload(0) + eq(part1.get_content_type(), 'message/rfc822') + eq(part1.is_multipart(), 1) + eq(len(part1.get_payload()), 1) + part1a = part1.get_payload(0) + eq(part1a.is_multipart(), 0) + eq(part1a.get_content_type(), 'text/plain') + neq(part1a.get_payload(), 'message 1\n') + # next message/rfc822 + part2 = msg.get_payload(1) + eq(part2.get_content_type(), 'message/rfc822') + eq(part2.is_multipart(), 1) + eq(len(part2.get_payload()), 1) + part2a = part2.get_payload(0) + eq(part2a.is_multipart(), 0) + eq(part2a.get_content_type(), 'text/plain') + neq(part2a.get_payload(), 'message 2\n') + + def test_three_lines(self): + # A bug report by Andrew McNamara + lines = ['From: Andrew Person From', 'From']) + eq(msg.get_payload(), 'body') + + def test_rfc2822_space_not_allowed_in_header(self): + eq = self.assertEqual + m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' + msg = email.message_from_string(m) + eq(len(msg.keys()), 0) + + def test_rfc2822_one_character_header(self): + eq = self.assertEqual + m = 'A: first header\nB: second header\nCC: third header\n\nbody' + msg = email.message_from_string(m) + headers = msg.keys() + headers.sort() + eq(headers, ['A', 'B', 'CC']) + eq(msg.get_payload(), 'body') + + def test_CRLFLF_at_end_of_part(self): + # issue 5610: feedparser should not eat two chars from body part ending + # with "\r\n\n". + m = ( + "From: foo@bar.com\n" + "To: baz\n" + "Mime-Version: 1.0\n" + "Content-Type: multipart/mixed; boundary=BOUNDARY\n" + "\n" + "--BOUNDARY\n" + "Content-Type: text/plain\n" + "\n" + "body ending with CRLF newline\r\n" + "\n" + "--BOUNDARY--\n" + ) + msg = email.message_from_string(m) + self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) + + +class TestBase64(unittest.TestCase): + def test_len(self): + eq = self.assertEqual + eq(base64MIME.base64_len('hello'), + len(base64MIME.encode('hello', eol=''))) + for size in range(15): + if size == 0 : bsize = 0 + elif size <= 3 : bsize = 4 + elif size <= 6 : bsize = 8 + elif size <= 9 : bsize = 12 + elif size <= 12: bsize = 16 + else : bsize = 20 + eq(base64MIME.base64_len('x'*size), bsize) + + def test_decode(self): + eq = self.assertEqual + eq(base64MIME.decode(''), '') + eq(base64MIME.decode('aGVsbG8='), 'hello') + eq(base64MIME.decode('aGVsbG8=', 'X'), 'hello') + eq(base64MIME.decode('aGVsbG8NCndvcmxk\n', 'X'), 'helloXworld') + + def test_encode(self): + eq = self.assertEqual + eq(base64MIME.encode(''), '') + eq(base64MIME.encode('hello'), 'aGVsbG8=\n') + # Test the binary flag + eq(base64MIME.encode('hello\n'), 'aGVsbG8K\n') + eq(base64MIME.encode('hello\n', 0), 'aGVsbG8NCg==\n') + # Test the maxlinelen arg + eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40), """\ +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IA== +""") + # Test the eol argument + eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IA==\r +""") + + def test_header_encode(self): + eq = self.assertEqual + he = base64MIME.header_encode + eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') + eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') + # Test the charset option + eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') + # Test the keep_eols flag + eq(he('hello\nworld', keep_eols=True), + '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') + # Test the maxlinelen argument + eq(he('xxxx ' * 20, maxlinelen=40), """\ +=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?= + =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?= + =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?= + =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?= + =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?= + =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""") + # Test the eol argument + eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=\r + =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=\r + =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=\r + =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=\r + =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=\r + =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""") + + + +class TestQuopri(unittest.TestCase): + def setUp(self): + self.hlit = [chr(x) for x in range(ord('a'), ord('z')+1)] + \ + [chr(x) for x in range(ord('A'), ord('Z')+1)] + \ + [chr(x) for x in range(ord('0'), ord('9')+1)] + \ + ['!', '*', '+', '-', '/', ' '] + self.hnon = [chr(x) for x in range(256) if chr(x) not in self.hlit] + assert len(self.hlit) + len(self.hnon) == 256 + self.blit = [chr(x) for x in range(ord(' '), ord('~')+1)] + ['\t'] + self.blit.remove('=') + self.bnon = [chr(x) for x in range(256) if chr(x) not in self.blit] + assert len(self.blit) + len(self.bnon) == 256 + + def test_header_quopri_check(self): + for c in self.hlit: + self.assertFalse(quopriMIME.header_quopri_check(c)) + for c in self.hnon: + self.assertTrue(quopriMIME.header_quopri_check(c)) + + def test_body_quopri_check(self): + for c in self.blit: + self.assertFalse(quopriMIME.body_quopri_check(c)) + for c in self.bnon: + self.assertTrue(quopriMIME.body_quopri_check(c)) + + def test_header_quopri_len(self): + eq = self.assertEqual + hql = quopriMIME.header_quopri_len + enc = quopriMIME.header_encode + for s in ('hello', 'h@e@l@l@o@'): + # Empty charset and no line-endings. 7 == RFC chrome + eq(hql(s), len(enc(s, charset='', eol=''))-7) + for c in self.hlit: + eq(hql(c), 1) + for c in self.hnon: + eq(hql(c), 3) + + def test_body_quopri_len(self): + eq = self.assertEqual + bql = quopriMIME.body_quopri_len + for c in self.blit: + eq(bql(c), 1) + for c in self.bnon: + eq(bql(c), 3) + + def test_quote_unquote_idempotent(self): + for x in range(256): + c = chr(x) + self.assertEqual(quopriMIME.unquote(quopriMIME.quote(c)), c) + + def test_header_encode(self): + eq = self.assertEqual + he = quopriMIME.header_encode + eq(he('hello'), '=?iso-8859-1?q?hello?=') + eq(he('hello\nworld'), '=?iso-8859-1?q?hello=0D=0Aworld?=') + # Test the charset option + eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=') + # Test the keep_eols flag + eq(he('hello\nworld', keep_eols=True), '=?iso-8859-1?q?hello=0Aworld?=') + # Test a non-ASCII character + eq(he('hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=') + # Test the maxlinelen argument + eq(he('xxxx ' * 20, maxlinelen=40), """\ +=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= + =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= + =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?= + =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?= + =?iso-8859-1?q?x_xxxx_xxxx_?=""") + # Test the eol argument + eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=\r + =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=\r + =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=\r + =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=\r + =?iso-8859-1?q?x_xxxx_xxxx_?=""") + + def test_decode(self): + eq = self.assertEqual + eq(quopriMIME.decode(''), '') + eq(quopriMIME.decode('hello'), 'hello') + eq(quopriMIME.decode('hello', 'X'), 'hello') + eq(quopriMIME.decode('hello\nworld', 'X'), 'helloXworld') + + def test_encode(self): + eq = self.assertEqual + eq(quopriMIME.encode(''), '') + eq(quopriMIME.encode('hello'), 'hello') + # Test the binary flag + eq(quopriMIME.encode('hello\r\nworld'), 'hello\nworld') + eq(quopriMIME.encode('hello\r\nworld', 0), 'hello\nworld') + # Test the maxlinelen arg + eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40), """\ +xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= + xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= +x xxxx xxxx xxxx xxxx=20""") + # Test the eol argument + eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r + xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r +x xxxx xxxx xxxx xxxx=20""") + eq(quopriMIME.encode("""\ +one line + +two line"""), """\ +one line + +two line""") + + + +# Test the Charset class +class TestCharset(unittest.TestCase): + def tearDown(self): + from email import Charset as CharsetModule + try: + del CharsetModule.CHARSETS['fake'] + except KeyError: + pass + + def test_idempotent(self): + eq = self.assertEqual + # Make sure us-ascii = no Unicode conversion + c = Charset('us-ascii') + s = 'Hello World!' + sp = c.to_splittable(s) + eq(s, c.from_splittable(sp)) + # test 8-bit idempotency with us-ascii + s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' + sp = c.to_splittable(s) + eq(s, c.from_splittable(sp)) + + def test_body_encode(self): + eq = self.assertEqual + # Try a charset with QP body encoding + c = Charset('iso-8859-1') + eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) + # Try a charset with Base64 body encoding + c = Charset('utf-8') + eq('aGVsbG8gd29ybGQ=\n', c.body_encode('hello world')) + # Try a charset with None body encoding + c = Charset('us-ascii') + eq('hello world', c.body_encode('hello world')) + # Try the convert argument, where input codec != output codec + c = Charset('euc-jp') + # With apologies to Tokio Kikuchi ;) + try: + eq('\x1b$B5FCO;~IW\x1b(B', + c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) + eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', + c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) + except LookupError: + # We probably don't have the Japanese codecs installed + pass + # Testing SF bug #625509, which we have to fake, since there are no + # built-in encodings where the header encoding is QP but the body + # encoding is not. + from email import Charset as CharsetModule + CharsetModule.add_charset('fake', CharsetModule.QP, None) + c = Charset('fake') + eq('hello w\xf6rld', c.body_encode('hello w\xf6rld')) + + def test_unicode_charset_name(self): + charset = Charset(u'us-ascii') + self.assertEqual(str(charset), 'us-ascii') + self.assertRaises(Errors.CharsetError, Charset, 'asc\xffii') + + def test_codecs_aliases_accepted(self): + charset = Charset('utf8') + self.assertEqual(str(charset), 'utf-8') + + +# Test multilingual MIME headers. +class TestHeader(TestEmailBase): + def test_simple(self): + eq = self.ndiffAssertEqual + h = Header('Hello World!') + eq(h.encode(), 'Hello World!') + h.append(' Goodbye World!') + eq(h.encode(), 'Hello World! Goodbye World!') + + def test_simple_surprise(self): + eq = self.ndiffAssertEqual + h = Header('Hello World!') + eq(h.encode(), 'Hello World!') + h.append('Goodbye World!') + eq(h.encode(), 'Hello World! Goodbye World!') + + def test_header_needs_no_decoding(self): + h = 'no decoding needed' + self.assertEqual(decode_header(h), [(h, None)]) + + def test_long(self): + h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", + maxlinelen=76) + for l in h.encode(splitchars=' ').split('\n '): + self.assertLessEqual(len(l), 76) + + def test_multilingual(self): + eq = self.ndiffAssertEqual + g = Charset("iso-8859-1") + cz = Charset("iso-8859-2") + utf8 = Charset("utf-8") + g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " + cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. " + utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8") + h = Header(g_head, g) + h.append(cz_head, cz) + h.append(utf8_head, utf8) + enc = h.encode() + eq(enc, """\ +=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_ko?= + =?iso-8859-1?q?mfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wan?= + =?iso-8859-1?q?dgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6?= + =?iso-8859-1?q?rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= + =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= + =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= + =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= + =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= + =?utf-8?q?_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das_Oder_die_Fl?= + =?utf-8?b?aXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBo+OBpuOBhOOBvuOBmQ==?= + =?utf-8?b?44CC?=""") + eq(decode_header(enc), + [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"), + (utf8_head, "utf-8")]) + ustr = unicode(h) + eq(ustr.encode('utf-8'), + 'Die Mieter treten hier ein werden mit einem Foerderband ' + 'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' + 'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' + 'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' + 'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' + '\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' + '\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' + '\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' + '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' + '\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' + '\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' + '\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' + '\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' + 'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' + 'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' + '\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82') + # Test make_header() + newh = make_header(decode_header(enc)) + eq(newh, enc) + + def test_header_ctor_default_args(self): + eq = self.ndiffAssertEqual + h = Header() + eq(h, '') + h.append('foo', Charset('iso-8859-1')) + eq(h, '=?iso-8859-1?q?foo?=') + + def test_explicit_maxlinelen(self): + eq = self.ndiffAssertEqual + hstr = 'A very long line that must get split to something other than at the 76th character boundary to test the non-default behavior' + h = Header(hstr) + eq(h.encode(), '''\ +A very long line that must get split to something other than at the 76th + character boundary to test the non-default behavior''') + h = Header(hstr, header_name='Subject') + eq(h.encode(), '''\ +A very long line that must get split to something other than at the + 76th character boundary to test the non-default behavior''') + h = Header(hstr, maxlinelen=1024, header_name='Subject') + eq(h.encode(), hstr) + + def test_us_ascii_header(self): + eq = self.assertEqual + s = 'hello' + x = decode_header(s) + eq(x, [('hello', None)]) + h = make_header(x) + eq(s, h.encode()) + + def test_string_charset(self): + eq = self.assertEqual + h = Header() + h.append('hello', 'iso-8859-1') + eq(h, '=?iso-8859-1?q?hello?=') + +## def test_unicode_error(self): +## raises = self.assertRaises +## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') +## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') +## h = Header() +## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') +## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') +## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') + + def test_utf8_shortest(self): + eq = self.assertEqual + h = Header(u'p\xf6stal', 'utf-8') + eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') + h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8') + eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') + + def test_bad_8bit_header(self): + raises = self.assertRaises + eq = self.assertEqual + x = 'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' + raises(UnicodeError, Header, x) + h = Header() + raises(UnicodeError, h.append, x) + eq(str(Header(x, errors='replace')), x) + h.append(x, errors='replace') + eq(str(h), x) + + def test_encoded_adjacent_nonencoded(self): + eq = self.assertEqual + h = Header() + h.append('hello', 'iso-8859-1') + h.append('world') + s = h.encode() + eq(s, '=?iso-8859-1?q?hello?= world') + h = make_header(decode_header(s)) + eq(h.encode(), s) + + def test_whitespace_eater(self): + eq = self.assertEqual + s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' + parts = decode_header(s) + eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)]) + hdr = make_header(parts) + eq(hdr.encode(), + 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') + + def test_broken_base64_header(self): + raises = self.assertRaises + s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' + raises(Errors.HeaderParseError, decode_header, s) + + # Issue 1078919 + def test_ascii_add_header(self): + msg = Message() + msg.add_header('Content-Disposition', 'attachment', + filename='bud.gif') + self.assertEqual('attachment; filename="bud.gif"', + msg['Content-Disposition']) + + def test_nonascii_add_header_via_triple(self): + msg = Message() + msg.add_header('Content-Disposition', 'attachment', + filename=('iso-8859-1', '', 'Fu\xdfballer.ppt')) + self.assertEqual( + 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"', + msg['Content-Disposition']) + + def test_encode_unaliased_charset(self): + # Issue 1379416: when the charset has no output conversion, + # output was accidentally getting coerced to unicode. + res = Header('abc','iso-8859-2').encode() + self.assertEqual(res, '=?iso-8859-2?q?abc?=') + self.assertIsInstance(res, str) + +# Test RFC 2231 header parameters (en/de)coding +class TestRFC2231(TestEmailBase): + def test_get_param(self): + eq = self.assertEqual + msg = self._msgobj('msg_29.txt') + eq(msg.get_param('title'), + ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) + eq(msg.get_param('title', unquote=False), + ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) + + def test_set_param(self): + eq = self.assertEqual + msg = Message() + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii') + eq(msg.get_param('title'), + ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii', language='en') + eq(msg.get_param('title'), + ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) + msg = self._msgobj('msg_01.txt') + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii', language='en') + self.ndiffAssertEqual(msg.as_string(), """\ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 +Content-Type: text/plain; charset=us-ascii; + title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21" + + +Hi, + +Do you like this message? + +-Me +""") + + def test_del_param(self): + eq = self.ndiffAssertEqual + msg = self._msgobj('msg_01.txt') + msg.set_param('foo', 'bar', charset='us-ascii', language='en') + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii', language='en') + msg.del_param('foo', header='Content-Type') + eq(msg.as_string(), """\ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 +Content-Type: text/plain; charset="us-ascii"; + title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21" + + +Hi, + +Do you like this message? + +-Me +""") + + def test_rfc2231_get_content_charset(self): + eq = self.assertEqual + msg = self._msgobj('msg_32.txt') + eq(msg.get_content_charset(), 'us-ascii') + + def test_rfc2231_no_language_or_charset(self): + m = '''\ +Content-Transfer-Encoding: 8bit +Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" +Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm + +''' + msg = email.message_from_string(m) + param = msg.get_param('NAME') + self.assertNotIsInstance(param, tuple) + self.assertEqual( + param, + 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') + + def test_rfc2231_no_language_or_charset_in_filename(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="''This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_no_language_or_charset_in_filename_encoded(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="''This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_partly_encoded(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0="''This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual( + msg.get_filename(), + 'This%20is%20even%20more%20***fun*** is it not.pdf') + + def test_rfc2231_partly_nonencoded(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0="This%20is%20even%20more%20"; +\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual( + msg.get_filename(), + 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') + + def test_rfc2231_no_language_or_charset_in_boundary(self): + m = '''\ +Content-Type: multipart/alternative; +\tboundary*0*="''This%20is%20even%20more%20"; +\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tboundary*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_boundary(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_no_language_or_charset_in_charset(self): + # This is a nonsensical charset value, but tests the code anyway + m = '''\ +Content-Type: text/plain; +\tcharset*0*="This%20is%20even%20more%20"; +\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tcharset*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_content_charset(), + 'this is even more ***fun*** is it not.pdf') + + def test_rfc2231_bad_encoding_in_filename(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_bad_encoding_in_charset(self): + m = """\ +Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D + +""" + msg = email.message_from_string(m) + # This should return None because non-ascii characters in the charset + # are not allowed. + self.assertEqual(msg.get_content_charset(), None) + + def test_rfc2231_bad_character_in_charset(self): + m = """\ +Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D + +""" + msg = email.message_from_string(m) + # This should return None because non-ascii characters in the charset + # are not allowed. + self.assertEqual(msg.get_content_charset(), None) + + def test_rfc2231_bad_character_in_filename(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2*="is it not.pdf%E2" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + u'This is even more ***fun*** is it not.pdf\ufffd') + + def test_rfc2231_unknown_encoding(self): + m = """\ +Content-Transfer-Encoding: 8bit +Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt + +""" + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), 'myfile.txt') + + def test_rfc2231_single_tick_in_filename_extended(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0*=\"Frank's\"; name*1*=\" Document\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, None) + eq(language, None) + eq(s, "Frank's Document") + + def test_rfc2231_single_tick_in_filename(self): + m = """\ +Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" + +""" + msg = email.message_from_string(m) + param = msg.get_param('name') + self.assertNotIsInstance(param, tuple) + self.assertEqual(param, "Frank's Document") + + def test_rfc2231_tick_attack_extended(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, 'us-ascii') + eq(language, 'en-us') + eq(s, "Frank's Document") + + def test_rfc2231_tick_attack(self): + m = """\ +Content-Type: application/x-foo; +\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" + +""" + msg = email.message_from_string(m) + param = msg.get_param('name') + self.assertNotIsInstance(param, tuple) + self.assertEqual(param, "us-ascii'en-us'Frank's Document") + + def test_rfc2231_no_extended_values(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; name=\"Frank's Document\" + +""" + msg = email.message_from_string(m) + eq(msg.get_param('name'), "Frank's Document") + + def test_rfc2231_encoded_then_unencoded_segments(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0*=\"us-ascii'en-us'My\"; +\tname*1=\" Document\"; +\tname*2*=\" For You\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, 'us-ascii') + eq(language, 'en-us') + eq(s, 'My Document For You') + + def test_rfc2231_unencoded_then_encoded_segments(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0=\"us-ascii'en-us'My\"; +\tname*1*=\" Document\"; +\tname*2*=\" For You\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, 'us-ascii') + eq(language, 'en-us') + eq(s, 'My Document For You') + + + +# Tests to ensure that signed parts of an email are completely preserved, as +# required by RFC1847 section 2.1. Note that these are incomplete, because the +# email package does not currently always preserve the body. See issue 1670765. +class TestSigned(TestEmailBase): + + def _msg_and_obj(self, filename): + fp = openfile(findfile(filename)) + try: + original = fp.read() + msg = email.message_from_string(original) + finally: + fp.close() + return original, msg + + def _signed_parts_eq(self, original, result): + # Extract the first mime part of each message + import re + repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) + inpart = repart.search(original).group(2) + outpart = repart.search(result).group(2) + self.assertEqual(outpart, inpart) + + def test_long_headers_as_string(self): + original, msg = self._msg_and_obj('msg_45.txt') + result = msg.as_string() + self._signed_parts_eq(original, result) + + def test_long_headers_flatten(self): + original, msg = self._msg_and_obj('msg_45.txt') + fp = StringIO() + Generator(fp).flatten(msg) + result = fp.getvalue() + self._signed_parts_eq(original, result) + + + +def _testclasses(): + mod = sys.modules[__name__] + return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')] + + +def suite(): + suite = unittest.TestSuite() + for testclass in _testclasses(): + suite.addTest(unittest.makeSuite(testclass)) + return suite + + +def test_main(): + for testclass in _testclasses(): + run_unittest(testclass) + + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/third_party/stdlib/email/test/test_email_codecs.py b/third_party/stdlib/email/test/test_email_codecs.py new file mode 100644 index 00000000..532750a6 --- /dev/null +++ b/third_party/stdlib/email/test/test_email_codecs.py @@ -0,0 +1,77 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Contact: email-sig@python.org +# email package unit tests for (optional) Asian codecs + +import unittest +from test.test_support import run_unittest + +from email.test.test_email import TestEmailBase +from email.charset import Charset +from email.header import Header, decode_header +from email.message import Message + +# We're compatible with Python 2.3, but it doesn't have the built-in Asian +# codecs, so we have to skip all these tests. +try: + unicode('foo', 'euc-jp') +except LookupError: + raise unittest.SkipTest + + + +class TestEmailAsianCodecs(TestEmailBase): + def test_japanese_codecs(self): + eq = self.ndiffAssertEqual + j = Charset("euc-jp") + g = Charset("iso-8859-1") + h = Header("Hello World!") + jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' + ghello = 'Gr\xfc\xdf Gott!' + h.append(jhello, j) + h.append(ghello, g) + # BAW: This used to -- and maybe should -- fold the two iso-8859-1 + # chunks into a single encoded word. However it doesn't violate the + # standard to have them as two encoded chunks and maybe it's + # reasonable for each .append() call to result in a separate + # encoded word. + eq(h.encode(), """\ +Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= + =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""") + eq(decode_header(h.encode()), + [('Hello World!', None), + ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), + ('Gr\xfc\xdf Gott!', 'iso-8859-1')]) + long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9' + h = Header(long, j, header_name="Subject") + # test a very long header + enc = h.encode() + # TK: splitting point may differ by codec design and/or Header encoding + eq(enc , """\ +=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= + =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") + # TK: full decode comparison + eq(h.__unicode__().encode('euc-jp'), long) + + def test_payload_encoding(self): + jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' + jcode = 'euc-jp' + msg = Message() + msg.set_payload(jhello, jcode) + ustr = unicode(msg.get_payload(), msg.get_content_charset()) + self.assertEqual(jhello, ustr.encode(jcode)) + + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(TestEmailAsianCodecs)) + return suite + + +def test_main(): + run_unittest(TestEmailAsianCodecs) + + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/third_party/stdlib/email/test/test_email_codecs_renamed.py b/third_party/stdlib/email/test/test_email_codecs_renamed.py new file mode 100644 index 00000000..532750a6 --- /dev/null +++ b/third_party/stdlib/email/test/test_email_codecs_renamed.py @@ -0,0 +1,77 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Contact: email-sig@python.org +# email package unit tests for (optional) Asian codecs + +import unittest +from test.test_support import run_unittest + +from email.test.test_email import TestEmailBase +from email.charset import Charset +from email.header import Header, decode_header +from email.message import Message + +# We're compatible with Python 2.3, but it doesn't have the built-in Asian +# codecs, so we have to skip all these tests. +try: + unicode('foo', 'euc-jp') +except LookupError: + raise unittest.SkipTest + + + +class TestEmailAsianCodecs(TestEmailBase): + def test_japanese_codecs(self): + eq = self.ndiffAssertEqual + j = Charset("euc-jp") + g = Charset("iso-8859-1") + h = Header("Hello World!") + jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' + ghello = 'Gr\xfc\xdf Gott!' + h.append(jhello, j) + h.append(ghello, g) + # BAW: This used to -- and maybe should -- fold the two iso-8859-1 + # chunks into a single encoded word. However it doesn't violate the + # standard to have them as two encoded chunks and maybe it's + # reasonable for each .append() call to result in a separate + # encoded word. + eq(h.encode(), """\ +Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= + =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""") + eq(decode_header(h.encode()), + [('Hello World!', None), + ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), + ('Gr\xfc\xdf Gott!', 'iso-8859-1')]) + long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9' + h = Header(long, j, header_name="Subject") + # test a very long header + enc = h.encode() + # TK: splitting point may differ by codec design and/or Header encoding + eq(enc , """\ +=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= + =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") + # TK: full decode comparison + eq(h.__unicode__().encode('euc-jp'), long) + + def test_payload_encoding(self): + jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' + jcode = 'euc-jp' + msg = Message() + msg.set_payload(jhello, jcode) + ustr = unicode(msg.get_payload(), msg.get_content_charset()) + self.assertEqual(jhello, ustr.encode(jcode)) + + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(TestEmailAsianCodecs)) + return suite + + +def test_main(): + run_unittest(TestEmailAsianCodecs) + + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/third_party/stdlib/email/test/test_email_renamed.py b/third_party/stdlib/email/test/test_email_renamed.py new file mode 100644 index 00000000..5a417012 --- /dev/null +++ b/third_party/stdlib/email/test/test_email_renamed.py @@ -0,0 +1,3324 @@ +# Copyright (C) 2001-2007 Python Software Foundation +# Contact: email-sig@python.org +# email package unit tests + +import os +import sys +import time +import base64 +import difflib +import unittest +import warnings +from cStringIO import StringIO + +import email + +from email.charset import Charset +from email.header import Header, decode_header, make_header +from email.parser import Parser, HeaderParser +from email.generator import Generator, DecodedGenerator +from email.message import Message +from email.mime.application import MIMEApplication +from email.mime.audio import MIMEAudio +from email.mime.text import MIMEText +from email.mime.image import MIMEImage +from email.mime.base import MIMEBase +from email.mime.message import MIMEMessage +from email.mime.multipart import MIMEMultipart +from email import utils +from email import errors +from email import encoders +from email import iterators +from email import base64mime +from email import quoprimime + +from test.test_support import findfile, run_unittest +from email.test import __file__ as landmark + + +NL = '\n' +EMPTYSTRING = '' +SPACE = ' ' + + + +def openfile(filename, mode='r'): + path = os.path.join(os.path.dirname(landmark), 'data', filename) + return open(path, mode) + + + +# Base test class +class TestEmailBase(unittest.TestCase): + def ndiffAssertEqual(self, first, second): + """Like assertEqual except use ndiff for readable output.""" + if first != second: + sfirst = str(first) + ssecond = str(second) + diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) + fp = StringIO() + print >> fp, NL, NL.join(diff) + raise self.failureException, fp.getvalue() + + def _msgobj(self, filename): + fp = openfile(findfile(filename)) + try: + msg = email.message_from_file(fp) + finally: + fp.close() + return msg + + + +# Test various aspects of the Message class's API +class TestMessageAPI(TestEmailBase): + def test_get_all(self): + eq = self.assertEqual + msg = self._msgobj('msg_20.txt') + eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) + eq(msg.get_all('xx', 'n/a'), 'n/a') + + def test_getset_charset(self): + eq = self.assertEqual + msg = Message() + eq(msg.get_charset(), None) + charset = Charset('iso-8859-1') + msg.set_charset(charset) + eq(msg['mime-version'], '1.0') + eq(msg.get_content_type(), 'text/plain') + eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') + eq(msg.get_param('charset'), 'iso-8859-1') + eq(msg['content-transfer-encoding'], 'quoted-printable') + eq(msg.get_charset().input_charset, 'iso-8859-1') + # Remove the charset + msg.set_charset(None) + eq(msg.get_charset(), None) + eq(msg['content-type'], 'text/plain') + # Try adding a charset when there's already MIME headers present + msg = Message() + msg['MIME-Version'] = '2.0' + msg['Content-Type'] = 'text/x-weird' + msg['Content-Transfer-Encoding'] = 'quinted-puntable' + msg.set_charset(charset) + eq(msg['mime-version'], '2.0') + eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') + eq(msg['content-transfer-encoding'], 'quinted-puntable') + + def test_set_charset_from_string(self): + eq = self.assertEqual + msg = Message() + msg.set_charset('us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + def test_set_payload_with_charset(self): + msg = Message() + charset = Charset('iso-8859-1') + msg.set_payload('This is a string payload', charset) + self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') + + def test_get_charsets(self): + eq = self.assertEqual + + msg = self._msgobj('msg_08.txt') + charsets = msg.get_charsets() + eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) + + msg = self._msgobj('msg_09.txt') + charsets = msg.get_charsets('dingbat') + eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', + 'koi8-r']) + + msg = self._msgobj('msg_12.txt') + charsets = msg.get_charsets() + eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', + 'iso-8859-3', 'us-ascii', 'koi8-r']) + + def test_get_filename(self): + eq = self.assertEqual + + msg = self._msgobj('msg_04.txt') + filenames = [p.get_filename() for p in msg.get_payload()] + eq(filenames, ['msg.txt', 'msg.txt']) + + msg = self._msgobj('msg_07.txt') + subpart = msg.get_payload(1) + eq(subpart.get_filename(), 'dingusfish.gif') + + def test_get_filename_with_name_parameter(self): + eq = self.assertEqual + + msg = self._msgobj('msg_44.txt') + filenames = [p.get_filename() for p in msg.get_payload()] + eq(filenames, ['msg.txt', 'msg.txt']) + + def test_get_boundary(self): + eq = self.assertEqual + msg = self._msgobj('msg_07.txt') + # No quotes! + eq(msg.get_boundary(), 'BOUNDARY') + + def test_set_boundary(self): + eq = self.assertEqual + # This one has no existing boundary parameter, but the Content-Type: + # header appears fifth. + msg = self._msgobj('msg_01.txt') + msg.set_boundary('BOUNDARY') + header, value = msg.items()[4] + eq(header.lower(), 'content-type') + eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') + # This one has a Content-Type: header, with a boundary, stuck in the + # middle of its headers. Make sure the order is preserved; it should + # be fifth. + msg = self._msgobj('msg_04.txt') + msg.set_boundary('BOUNDARY') + header, value = msg.items()[4] + eq(header.lower(), 'content-type') + eq(value, 'multipart/mixed; boundary="BOUNDARY"') + # And this one has no Content-Type: header at all. + msg = self._msgobj('msg_03.txt') + self.assertRaises(errors.HeaderParseError, + msg.set_boundary, 'BOUNDARY') + + def test_get_decoded_payload(self): + eq = self.assertEqual + msg = self._msgobj('msg_10.txt') + # The outer message is a multipart + eq(msg.get_payload(decode=True), None) + # Subpart 1 is 7bit encoded + eq(msg.get_payload(0).get_payload(decode=True), + 'This is a 7bit encoded message.\n') + # Subpart 2 is quopri + eq(msg.get_payload(1).get_payload(decode=True), + '\xa1This is a Quoted Printable encoded message!\n') + # Subpart 3 is base64 + eq(msg.get_payload(2).get_payload(decode=True), + 'This is a Base64 encoded message.') + # Subpart 4 is base64 with a trailing newline, which + # used to be stripped (issue 7143). + eq(msg.get_payload(3).get_payload(decode=True), + 'This is a Base64 encoded message.\n') + # Subpart 5 has no Content-Transfer-Encoding: header. + eq(msg.get_payload(4).get_payload(decode=True), + 'This has no Content-Transfer-Encoding: header.\n') + + def test_get_decoded_uu_payload(self): + eq = self.assertEqual + msg = Message() + msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') + for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): + msg['content-transfer-encoding'] = cte + eq(msg.get_payload(decode=True), 'hello world') + # Now try some bogus data + msg.set_payload('foo') + eq(msg.get_payload(decode=True), 'foo') + + def test_decoded_generator(self): + eq = self.assertEqual + msg = self._msgobj('msg_07.txt') + fp = openfile('msg_17.txt') + try: + text = fp.read() + finally: + fp.close() + s = StringIO() + g = DecodedGenerator(s) + g.flatten(msg) + eq(s.getvalue(), text) + + def test__contains__(self): + msg = Message() + msg['From'] = 'Me' + msg['to'] = 'You' + # Check for case insensitivity + self.assertIn('from', msg) + self.assertIn('From', msg) + self.assertIn('FROM', msg) + self.assertIn('to', msg) + self.assertIn('To', msg) + self.assertIn('TO', msg) + + def test_as_string(self): + eq = self.assertEqual + msg = self._msgobj('msg_01.txt') + fp = openfile('msg_01.txt') + try: + # BAW 30-Mar-2009 Evil be here. So, the generator is broken with + # respect to long line breaking. It's also not idempotent when a + # header from a parsed message is continued with tabs rather than + # spaces. Before we fixed bug 1974 it was reversedly broken, + # i.e. headers that were continued with spaces got continued with + # tabs. For Python 2.x there's really no good fix and in Python + # 3.x all this stuff is re-written to be right(er). Chris Withers + # convinced me that using space as the default continuation + # character is less bad for more applications. + text = fp.read().replace('\t', ' ') + finally: + fp.close() + self.ndiffAssertEqual(text, msg.as_string()) + fullrepr = str(msg) + lines = fullrepr.split('\n') + self.assertTrue(lines[0].startswith('From ')) + eq(text, NL.join(lines[1:])) + + def test_bad_param(self): + msg = email.message_from_string("Content-Type: blarg; baz; boo\n") + self.assertEqual(msg.get_param('baz'), '') + + def test_missing_filename(self): + msg = email.message_from_string("From: foo\n") + self.assertEqual(msg.get_filename(), None) + + def test_bogus_filename(self): + msg = email.message_from_string( + "Content-Disposition: blarg; filename\n") + self.assertEqual(msg.get_filename(), '') + + def test_missing_boundary(self): + msg = email.message_from_string("From: foo\n") + self.assertEqual(msg.get_boundary(), None) + + def test_get_params(self): + eq = self.assertEqual + msg = email.message_from_string( + 'X-Header: foo=one; bar=two; baz=three\n') + eq(msg.get_params(header='x-header'), + [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) + msg = email.message_from_string( + 'X-Header: foo; bar=one; baz=two\n') + eq(msg.get_params(header='x-header'), + [('foo', ''), ('bar', 'one'), ('baz', 'two')]) + eq(msg.get_params(), None) + msg = email.message_from_string( + 'X-Header: foo; bar="one"; baz=two\n') + eq(msg.get_params(header='x-header'), + [('foo', ''), ('bar', 'one'), ('baz', 'two')]) + + def test_get_param_liberal(self): + msg = Message() + msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' + self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') + + def test_get_param(self): + eq = self.assertEqual + msg = email.message_from_string( + "X-Header: foo=one; bar=two; baz=three\n") + eq(msg.get_param('bar', header='x-header'), 'two') + eq(msg.get_param('quuz', header='x-header'), None) + eq(msg.get_param('quuz'), None) + msg = email.message_from_string( + 'X-Header: foo; bar="one"; baz=two\n') + eq(msg.get_param('foo', header='x-header'), '') + eq(msg.get_param('bar', header='x-header'), 'one') + eq(msg.get_param('baz', header='x-header'), 'two') + # XXX: We are not RFC-2045 compliant! We cannot parse: + # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' + # msg.get_param("weird") + # yet. + + def test_get_param_funky_continuation_lines(self): + msg = self._msgobj('msg_22.txt') + self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') + + def test_get_param_with_semis_in_quotes(self): + msg = email.message_from_string( + 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') + self.assertEqual(msg.get_param('name'), 'Jim&&Jill') + self.assertEqual(msg.get_param('name', unquote=False), + '"Jim&&Jill"') + + def test_has_key(self): + msg = email.message_from_string('Header: exists') + self.assertTrue(msg.has_key('header')) + self.assertTrue(msg.has_key('Header')) + self.assertTrue(msg.has_key('HEADER')) + self.assertFalse(msg.has_key('headeri')) + + def test_set_param(self): + eq = self.assertEqual + msg = Message() + msg.set_param('charset', 'iso-2022-jp') + eq(msg.get_param('charset'), 'iso-2022-jp') + msg.set_param('importance', 'high value') + eq(msg.get_param('importance'), 'high value') + eq(msg.get_param('importance', unquote=False), '"high value"') + eq(msg.get_params(), [('text/plain', ''), + ('charset', 'iso-2022-jp'), + ('importance', 'high value')]) + eq(msg.get_params(unquote=False), [('text/plain', ''), + ('charset', '"iso-2022-jp"'), + ('importance', '"high value"')]) + msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') + eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') + + def test_del_param(self): + eq = self.assertEqual + msg = self._msgobj('msg_05.txt') + eq(msg.get_params(), + [('multipart/report', ''), ('report-type', 'delivery-status'), + ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) + old_val = msg.get_param("report-type") + msg.del_param("report-type") + eq(msg.get_params(), + [('multipart/report', ''), + ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) + msg.set_param("report-type", old_val) + eq(msg.get_params(), + [('multipart/report', ''), + ('boundary', 'D1690A7AC1.996856090/mail.example.com'), + ('report-type', old_val)]) + + def test_del_param_on_other_header(self): + msg = Message() + msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') + msg.del_param('filename', 'content-disposition') + self.assertEqual(msg['content-disposition'], 'attachment') + + def test_set_type(self): + eq = self.assertEqual + msg = Message() + self.assertRaises(ValueError, msg.set_type, 'text') + msg.set_type('text/plain') + eq(msg['content-type'], 'text/plain') + msg.set_param('charset', 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + msg.set_type('text/html') + eq(msg['content-type'], 'text/html; charset="us-ascii"') + + def test_set_type_on_other_header(self): + msg = Message() + msg['X-Content-Type'] = 'text/plain' + msg.set_type('application/octet-stream', 'X-Content-Type') + self.assertEqual(msg['x-content-type'], 'application/octet-stream') + + def test_get_content_type_missing(self): + msg = Message() + self.assertEqual(msg.get_content_type(), 'text/plain') + + def test_get_content_type_missing_with_default_type(self): + msg = Message() + msg.set_default_type('message/rfc822') + self.assertEqual(msg.get_content_type(), 'message/rfc822') + + def test_get_content_type_from_message_implicit(self): + msg = self._msgobj('msg_30.txt') + self.assertEqual(msg.get_payload(0).get_content_type(), + 'message/rfc822') + + def test_get_content_type_from_message_explicit(self): + msg = self._msgobj('msg_28.txt') + self.assertEqual(msg.get_payload(0).get_content_type(), + 'message/rfc822') + + def test_get_content_type_from_message_text_plain_implicit(self): + msg = self._msgobj('msg_03.txt') + self.assertEqual(msg.get_content_type(), 'text/plain') + + def test_get_content_type_from_message_text_plain_explicit(self): + msg = self._msgobj('msg_01.txt') + self.assertEqual(msg.get_content_type(), 'text/plain') + + def test_get_content_maintype_missing(self): + msg = Message() + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_content_maintype_missing_with_default_type(self): + msg = Message() + msg.set_default_type('message/rfc822') + self.assertEqual(msg.get_content_maintype(), 'message') + + def test_get_content_maintype_from_message_implicit(self): + msg = self._msgobj('msg_30.txt') + self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') + + def test_get_content_maintype_from_message_explicit(self): + msg = self._msgobj('msg_28.txt') + self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') + + def test_get_content_maintype_from_message_text_plain_implicit(self): + msg = self._msgobj('msg_03.txt') + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_content_maintype_from_message_text_plain_explicit(self): + msg = self._msgobj('msg_01.txt') + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_content_subtype_missing(self): + msg = Message() + self.assertEqual(msg.get_content_subtype(), 'plain') + + def test_get_content_subtype_missing_with_default_type(self): + msg = Message() + msg.set_default_type('message/rfc822') + self.assertEqual(msg.get_content_subtype(), 'rfc822') + + def test_get_content_subtype_from_message_implicit(self): + msg = self._msgobj('msg_30.txt') + self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') + + def test_get_content_subtype_from_message_explicit(self): + msg = self._msgobj('msg_28.txt') + self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') + + def test_get_content_subtype_from_message_text_plain_implicit(self): + msg = self._msgobj('msg_03.txt') + self.assertEqual(msg.get_content_subtype(), 'plain') + + def test_get_content_subtype_from_message_text_plain_explicit(self): + msg = self._msgobj('msg_01.txt') + self.assertEqual(msg.get_content_subtype(), 'plain') + + def test_get_content_maintype_error(self): + msg = Message() + msg['Content-Type'] = 'no-slash-in-this-string' + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_content_subtype_error(self): + msg = Message() + msg['Content-Type'] = 'no-slash-in-this-string' + self.assertEqual(msg.get_content_subtype(), 'plain') + + def test_replace_header(self): + eq = self.assertEqual + msg = Message() + msg.add_header('First', 'One') + msg.add_header('Second', 'Two') + msg.add_header('Third', 'Three') + eq(msg.keys(), ['First', 'Second', 'Third']) + eq(msg.values(), ['One', 'Two', 'Three']) + msg.replace_header('Second', 'Twenty') + eq(msg.keys(), ['First', 'Second', 'Third']) + eq(msg.values(), ['One', 'Twenty', 'Three']) + msg.add_header('First', 'Eleven') + msg.replace_header('First', 'One Hundred') + eq(msg.keys(), ['First', 'Second', 'Third', 'First']) + eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) + self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') + + def test_broken_base64_payload(self): + x = 'AwDp0P7//y6LwKEAcPa/6Q=9' + msg = Message() + msg['content-type'] = 'audio/x-midi' + msg['content-transfer-encoding'] = 'base64' + msg.set_payload(x) + self.assertEqual(msg.get_payload(decode=True), x) + + + +# Test the email.encoders module +class TestEncoders(unittest.TestCase): + def test_encode_empty_payload(self): + eq = self.assertEqual + msg = Message() + msg.set_charset('us-ascii') + eq(msg['content-transfer-encoding'], '7bit') + + def test_default_cte(self): + eq = self.assertEqual + msg = MIMEText('hello world') + eq(msg['content-transfer-encoding'], '7bit') + + def test_default_cte(self): + eq = self.assertEqual + # With no explicit _charset its us-ascii, and all are 7-bit + msg = MIMEText('hello world') + eq(msg['content-transfer-encoding'], '7bit') + # Similar, but with 8-bit data + msg = MIMEText('hello \xf8 world') + eq(msg['content-transfer-encoding'], '8bit') + # And now with a different charset + msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') + eq(msg['content-transfer-encoding'], 'quoted-printable') + + + +# Test long header wrapping +class TestLongHeaders(TestEmailBase): + def test_split_long_continuation(self): + eq = self.ndiffAssertEqual + msg = email.message_from_string("""\ +Subject: bug demonstration +\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 +\tmore text + +test +""") + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +Subject: bug demonstration + 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 + more text + +test +""") + + def test_another_long_almost_unsplittable_header(self): + eq = self.ndiffAssertEqual + hstr = """\ +bug demonstration +\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 +\tmore text""" + h = Header(hstr, continuation_ws='\t') + eq(h.encode(), """\ +bug demonstration +\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 +\tmore text""") + h = Header(hstr) + eq(h.encode(), """\ +bug demonstration + 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 + more text""") + + def test_long_nonstring(self): + eq = self.ndiffAssertEqual + g = Charset("iso-8859-1") + cz = Charset("iso-8859-2") + utf8 = Charset("utf-8") + g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " + cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. " + utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8") + h = Header(g_head, g, header_name='Subject') + h.append(cz_head, cz) + h.append(utf8_head, utf8) + msg = Message() + msg['Subject'] = h + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?= + =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?= + =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?= + =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?= + =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= + =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?= + =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?= + =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?= + =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?= + =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?= + =?utf-8?b?44Gm44GE44G+44GZ44CC?= + +""") + eq(h.encode(), """\ +=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?= + =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?= + =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?= + =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?= + =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= + =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?= + =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?= + =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?= + =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?= + =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?= + =?utf-8?b?44Gm44GE44G+44GZ44CC?=""") + + def test_long_header_encode(self): + eq = self.ndiffAssertEqual + h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' + 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', + header_name='X-Foobar-Spoink-Defrobnit') + eq(h.encode(), '''\ +wasnipoop; giraffes="very-long-necked-animals"; + spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') + + def test_long_header_encode_with_tab_continuation(self): + eq = self.ndiffAssertEqual + h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' + 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', + header_name='X-Foobar-Spoink-Defrobnit', + continuation_ws='\t') + eq(h.encode(), '''\ +wasnipoop; giraffes="very-long-necked-animals"; +\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') + + def test_header_splitter(self): + eq = self.ndiffAssertEqual + msg = MIMEText('') + # It'd be great if we could use add_header() here, but that doesn't + # guarantee an order of the parameters. + msg['X-Foobar-Spoink-Defrobnit'] = ( + 'wasnipoop; giraffes="very-long-necked-animals"; ' + 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), '''\ +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; + spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" + +''') + + def test_no_semis_header_splitter(self): + eq = self.ndiffAssertEqual + msg = Message() + msg['From'] = 'test@dom.ain' + msg['References'] = SPACE.join(['<%d@dom.ain>' % i for i in range(10)]) + msg.set_payload('Test') + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +From: test@dom.ain +References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> + <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> + +Test""") + + def test_no_split_long_header(self): + eq = self.ndiffAssertEqual + hstr = 'References: ' + 'x' * 80 + h = Header(hstr, continuation_ws='\t') + eq(h.encode(), """\ +References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") + + def test_splitting_multiple_long_lines(self): + eq = self.ndiffAssertEqual + hstr = """\ +from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +""" + h = Header(hstr, continuation_ws='\t') + eq(h.encode(), """\ +from babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST)""") + + def test_splitting_first_line_only_is_long(self): + eq = self.ndiffAssertEqual + hstr = """\ +from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) +\tby kronos.mems-exchange.org with esmtp (Exim 4.05) +\tid 17k4h5-00034i-00 +\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" + h = Header(hstr, maxlinelen=78, header_name='Received', + continuation_ws='\t') + eq(h.encode(), """\ +from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] +\thelo=cthulhu.gerg.ca) +\tby kronos.mems-exchange.org with esmtp (Exim 4.05) +\tid 17k4h5-00034i-00 +\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") + + def test_long_8bit_header(self): + eq = self.ndiffAssertEqual + msg = Message() + h = Header('Britische Regierung gibt', 'iso-8859-1', + header_name='Subject') + h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') + msg['Subject'] = h + eq(msg.as_string(), """\ +Subject: =?iso-8859-1?q?Britische_Regierung_gibt?= =?iso-8859-1?q?gr=FCnes?= + =?iso-8859-1?q?_Licht_f=FCr_Offshore-Windkraftprojekte?= + +""") + + def test_long_8bit_header_no_charset(self): + eq = self.ndiffAssertEqual + msg = Message() + msg['Reply-To'] = 'Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte ' + eq(msg.as_string(), """\ +Reply-To: Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte + +""") + + def test_long_to_header(self): + eq = self.ndiffAssertEqual + to = '"Someone Test #A" ,,"Someone Test #B" , "Someone Test #C" , "Someone Test #D" ' + msg = Message() + msg['To'] = to + eq(msg.as_string(0), '''\ +To: "Someone Test #A" , , + "Someone Test #B" , + "Someone Test #C" , + "Someone Test #D" + +''') + + def test_long_line_after_append(self): + eq = self.ndiffAssertEqual + s = 'This is an example of string which has almost the limit of header length.' + h = Header(s) + h.append('Add another line.') + eq(h.encode(), """\ +This is an example of string which has almost the limit of header length. + Add another line.""") + + def test_shorter_line_with_append(self): + eq = self.ndiffAssertEqual + s = 'This is a shorter line.' + h = Header(s) + h.append('Add another sentence. (Surprise?)') + eq(h.encode(), + 'This is a shorter line. Add another sentence. (Surprise?)') + + def test_long_field_name(self): + eq = self.ndiffAssertEqual + fn = 'X-Very-Very-Very-Long-Header-Name' + gs = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " + h = Header(gs, 'iso-8859-1', header_name=fn) + # BAW: this seems broken because the first line is too long + eq(h.encode(), """\ +=?iso-8859-1?q?Die_Mieter_treten_hier_?= + =?iso-8859-1?q?ein_werden_mit_einem_Foerderband_komfortabel_den_Korridor_?= + =?iso-8859-1?q?entlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_g?= + =?iso-8859-1?q?egen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") + + def test_long_received_header(self): + h = 'from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; Wed, 05 Mar 2003 18:10:18 -0700' + msg = Message() + msg['Received-1'] = Header(h, continuation_ws='\t') + msg['Received-2'] = h + self.ndiffAssertEqual(msg.as_string(), """\ +Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by +\throthgar.la.mastaler.com (tmda-ofmipd) with ESMTP; +\tWed, 05 Mar 2003 18:10:18 -0700 +Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by + hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; + Wed, 05 Mar 2003 18:10:18 -0700 + +""") + + def test_string_headerinst_eq(self): + h = '<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner\'s message of "Thu, 6 Mar 2003 13:58:21 +0100")' + msg = Message() + msg['Received'] = Header(h, header_name='Received-1', + continuation_ws='\t') + msg['Received'] = h + self.ndiffAssertEqual(msg.as_string(), """\ +Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> +\t(David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100") +Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> + (David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100") + +""") + + def test_long_unbreakable_lines_with_continuation(self): + eq = self.ndiffAssertEqual + msg = Message() + t = """\ + iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 + locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" + msg['Face-1'] = t + msg['Face-2'] = Header(t, header_name='Face-2') + eq(msg.as_string(), """\ +Face-1: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 + locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp +Face-2: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 + locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp + +""") + + def test_another_long_multiline_header(self): + eq = self.ndiffAssertEqual + m = '''\ +Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905); + Wed, 16 Oct 2002 07:41:11 -0700''' + msg = email.message_from_string(m) + eq(msg.as_string(), '''\ +Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with + Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 + +''') + + def test_long_lines_with_different_header(self): + eq = self.ndiffAssertEqual + h = """\ +List-Unsubscribe: , + """ + msg = Message() + msg['List'] = h + msg['List'] = Header(h, header_name='List') + self.ndiffAssertEqual(msg.as_string(), """\ +List: List-Unsubscribe: , + +List: List-Unsubscribe: , + + +""") + + + +# Test mangling of "From " lines in the body of a message +class TestFromMangling(unittest.TestCase): + def setUp(self): + self.msg = Message() + self.msg['From'] = 'aaa@bbb.org' + self.msg.set_payload("""\ +From the desk of A.A.A.: +Blah blah blah +""") + + def test_mangled_from(self): + s = StringIO() + g = Generator(s, mangle_from_=True) + g.flatten(self.msg) + self.assertEqual(s.getvalue(), """\ +From: aaa@bbb.org + +>From the desk of A.A.A.: +Blah blah blah +""") + + def test_dont_mangle_from(self): + s = StringIO() + g = Generator(s, mangle_from_=False) + g.flatten(self.msg) + self.assertEqual(s.getvalue(), """\ +From: aaa@bbb.org + +From the desk of A.A.A.: +Blah blah blah +""") + + + +# Test the basic MIMEAudio class +class TestMIMEAudio(unittest.TestCase): + def setUp(self): + # Make sure we pick up the audiotest.au that lives in email/test/data. + # In Python, there's an audiotest.au living in Lib/test but that isn't + # included in some binary distros that don't include the test + # package. The trailing empty string on the .join() is significant + # since findfile() will do a dirname(). + datadir = os.path.join(os.path.dirname(landmark), 'data', '') + fp = open(findfile('audiotest.au', datadir), 'rb') + try: + self._audiodata = fp.read() + finally: + fp.close() + self._au = MIMEAudio(self._audiodata) + + def test_guess_minor_type(self): + self.assertEqual(self._au.get_content_type(), 'audio/basic') + + def test_encoding(self): + payload = self._au.get_payload() + self.assertEqual(base64.decodestring(payload), self._audiodata) + + def test_checkSetMinor(self): + au = MIMEAudio(self._audiodata, 'fish') + self.assertEqual(au.get_content_type(), 'audio/fish') + + def test_add_header(self): + eq = self.assertEqual + self._au.add_header('Content-Disposition', 'attachment', + filename='audiotest.au') + eq(self._au['content-disposition'], + 'attachment; filename="audiotest.au"') + eq(self._au.get_params(header='content-disposition'), + [('attachment', ''), ('filename', 'audiotest.au')]) + eq(self._au.get_param('filename', header='content-disposition'), + 'audiotest.au') + missing = [] + eq(self._au.get_param('attachment', header='content-disposition'), '') + self.assertIs(self._au.get_param('foo', failobj=missing, + header='content-disposition'), + missing) + # Try some missing stuff + self.assertIs(self._au.get_param('foobar', missing), missing) + self.assertIs(self._au.get_param('attachment', missing, + header='foobar'), missing) + + + +# Test the basic MIMEImage class +class TestMIMEImage(unittest.TestCase): + def setUp(self): + fp = openfile('PyBanner048.gif') + try: + self._imgdata = fp.read() + finally: + fp.close() + self._im = MIMEImage(self._imgdata) + + def test_guess_minor_type(self): + self.assertEqual(self._im.get_content_type(), 'image/gif') + + def test_encoding(self): + payload = self._im.get_payload() + self.assertEqual(base64.decodestring(payload), self._imgdata) + + def test_checkSetMinor(self): + im = MIMEImage(self._imgdata, 'fish') + self.assertEqual(im.get_content_type(), 'image/fish') + + def test_add_header(self): + eq = self.assertEqual + self._im.add_header('Content-Disposition', 'attachment', + filename='dingusfish.gif') + eq(self._im['content-disposition'], + 'attachment; filename="dingusfish.gif"') + eq(self._im.get_params(header='content-disposition'), + [('attachment', ''), ('filename', 'dingusfish.gif')]) + eq(self._im.get_param('filename', header='content-disposition'), + 'dingusfish.gif') + missing = [] + eq(self._im.get_param('attachment', header='content-disposition'), '') + self.assertIs(self._im.get_param('foo', failobj=missing, + header='content-disposition'), + missing) + # Try some missing stuff + self.assertIs(self._im.get_param('foobar', missing), missing) + self.assertIs(self._im.get_param('attachment', missing, + header='foobar'), missing) + + + +# Test the basic MIMEApplication class +class TestMIMEApplication(unittest.TestCase): + def test_headers(self): + eq = self.assertEqual + msg = MIMEApplication('\xfa\xfb\xfc\xfd\xfe\xff') + eq(msg.get_content_type(), 'application/octet-stream') + eq(msg['content-transfer-encoding'], 'base64') + + def test_body(self): + eq = self.assertEqual + bytes = '\xfa\xfb\xfc\xfd\xfe\xff' + msg = MIMEApplication(bytes) + eq(msg.get_payload(), '+vv8/f7/') + eq(msg.get_payload(decode=True), bytes) + + def test_binary_body_with_encode_7or8bit(self): + # Issue 17171. + bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' + msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) + # Treated as a string, this will be invalid code points. + self.assertEqual(msg.get_payload(), bytesdata) + self.assertEqual(msg.get_payload(decode=True), bytesdata) + self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') + s = StringIO() + g = Generator(s) + g.flatten(msg) + wireform = s.getvalue() + msg2 = email.message_from_string(wireform) + self.assertEqual(msg.get_payload(), bytesdata) + self.assertEqual(msg2.get_payload(decode=True), bytesdata) + self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') + + def test_binary_body_with_encode_noop(self): + # Issue 16564: This does not produce an RFC valid message, since to be + # valid it should have a CTE of binary. But the below works, and is + # documented as working this way. + bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' + msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) + self.assertEqual(msg.get_payload(), bytesdata) + self.assertEqual(msg.get_payload(decode=True), bytesdata) + s = StringIO() + g = Generator(s) + g.flatten(msg) + wireform = s.getvalue() + msg2 = email.message_from_string(wireform) + self.assertEqual(msg.get_payload(), bytesdata) + self.assertEqual(msg2.get_payload(decode=True), bytesdata) + + +# Test the basic MIMEText class +class TestMIMEText(unittest.TestCase): + def setUp(self): + self._msg = MIMEText('hello there') + + def test_types(self): + eq = self.assertEqual + eq(self._msg.get_content_type(), 'text/plain') + eq(self._msg.get_param('charset'), 'us-ascii') + missing = [] + self.assertIs(self._msg.get_param('foobar', missing), missing) + self.assertIs(self._msg.get_param('charset', missing, header='foobar'), + missing) + + def test_payload(self): + self.assertEqual(self._msg.get_payload(), 'hello there') + self.assertFalse(self._msg.is_multipart()) + + def test_charset(self): + eq = self.assertEqual + msg = MIMEText('hello there', _charset='us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + + +# Test complicated multipart/* messages +class TestMultipart(TestEmailBase): + def setUp(self): + fp = openfile('PyBanner048.gif') + try: + data = fp.read() + finally: + fp.close() + + container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') + image = MIMEImage(data, name='dingusfish.gif') + image.add_header('content-disposition', 'attachment', + filename='dingusfish.gif') + intro = MIMEText('''\ +Hi there, + +This is the dingus fish. +''') + container.attach(intro) + container.attach(image) + container['From'] = 'Barry ' + container['To'] = 'Dingus Lovers ' + container['Subject'] = 'Here is your dingus fish' + + now = 987809702.54848599 + timetuple = time.localtime(now) + if timetuple[-1] == 0: + tzsecs = time.timezone + else: + tzsecs = time.altzone + if tzsecs > 0: + sign = '-' + else: + sign = '+' + tzoffset = ' %s%04d' % (sign, tzsecs // 36) + container['Date'] = time.strftime( + '%a, %d %b %Y %H:%M:%S', + time.localtime(now)) + tzoffset + self._msg = container + self._im = image + self._txt = intro + + def test_hierarchy(self): + # convenience + eq = self.assertEqual + raises = self.assertRaises + # tests + m = self._msg + self.assertTrue(m.is_multipart()) + eq(m.get_content_type(), 'multipart/mixed') + eq(len(m.get_payload()), 2) + raises(IndexError, m.get_payload, 2) + m0 = m.get_payload(0) + m1 = m.get_payload(1) + self.assertIs(m0, self._txt) + self.assertIs(m1, self._im) + eq(m.get_payload(), [m0, m1]) + self.assertFalse(m0.is_multipart()) + self.assertFalse(m1.is_multipart()) + + def test_empty_multipart_idempotent(self): + text = """\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + + +--BOUNDARY + + +--BOUNDARY-- +""" + msg = Parser().parsestr(text) + self.ndiffAssertEqual(text, msg.as_string()) + + def test_no_parts_in_a_multipart_with_none_epilogue(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.set_boundary('BOUNDARY') + self.ndiffAssertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY + +--BOUNDARY-- +''') + + def test_no_parts_in_a_multipart_with_empty_epilogue(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + outer.epilogue = '' + outer.set_boundary('BOUNDARY') + self.ndiffAssertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + + +--BOUNDARY + +--BOUNDARY-- +''') + + def test_one_part_in_a_multipart(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.set_boundary('BOUNDARY') + msg = MIMEText('hello world') + outer.attach(msg) + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + def test_seq_parts_in_a_multipart_with_empty_preamble(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + + def test_seq_parts_in_a_multipart_with_none_preamble(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = None + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + + def test_seq_parts_in_a_multipart_with_none_epilogue(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.epilogue = None + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + + def test_seq_parts_in_a_multipart_with_empty_epilogue(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.epilogue = '' + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- +''') + + + def test_seq_parts_in_a_multipart_with_nl_epilogue(self): + eq = self.ndiffAssertEqual + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.epilogue = '\n' + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + eq(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world +--BOUNDARY-- + +''') + + def test_message_external_body(self): + eq = self.assertEqual + msg = self._msgobj('msg_36.txt') + eq(len(msg.get_payload()), 2) + msg1 = msg.get_payload(1) + eq(msg1.get_content_type(), 'multipart/alternative') + eq(len(msg1.get_payload()), 2) + for subpart in msg1.get_payload(): + eq(subpart.get_content_type(), 'message/external-body') + eq(len(subpart.get_payload()), 1) + subsubpart = subpart.get_payload(0) + eq(subsubpart.get_content_type(), 'text/plain') + + def test_double_boundary(self): + # msg_37.txt is a multipart that contains two dash-boundary's in a + # row. Our interpretation of RFC 2046 calls for ignoring the second + # and subsequent boundaries. + msg = self._msgobj('msg_37.txt') + self.assertEqual(len(msg.get_payload()), 3) + + def test_nested_inner_contains_outer_boundary(self): + eq = self.ndiffAssertEqual + # msg_38.txt has an inner part that contains outer boundaries. My + # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say + # these are illegal and should be interpreted as unterminated inner + # parts. + msg = self._msgobj('msg_38.txt') + sfp = StringIO() + iterators._structure(msg, sfp) + eq(sfp.getvalue(), """\ +multipart/mixed + multipart/mixed + multipart/alternative + text/plain + text/plain + text/plain + text/plain +""") + + def test_nested_with_same_boundary(self): + eq = self.ndiffAssertEqual + # msg 39.txt is similarly evil in that it's got inner parts that use + # the same boundary as outer parts. Again, I believe the way this is + # parsed is closest to the spirit of RFC 2046 + msg = self._msgobj('msg_39.txt') + sfp = StringIO() + iterators._structure(msg, sfp) + eq(sfp.getvalue(), """\ +multipart/mixed + multipart/mixed + multipart/alternative + application/octet-stream + application/octet-stream + text/plain +""") + + def test_boundary_in_non_multipart(self): + msg = self._msgobj('msg_40.txt') + self.assertEqual(msg.as_string(), '''\ +MIME-Version: 1.0 +Content-Type: text/html; boundary="--961284236552522269" + +----961284236552522269 +Content-Type: text/html; +Content-Transfer-Encoding: 7Bit + + + +----961284236552522269-- +''') + + def test_boundary_with_leading_space(self): + eq = self.assertEqual + msg = email.message_from_string('''\ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary=" XXXX" + +-- XXXX +Content-Type: text/plain + + +-- XXXX +Content-Type: text/plain + +-- XXXX-- +''') + self.assertTrue(msg.is_multipart()) + eq(msg.get_boundary(), ' XXXX') + eq(len(msg.get_payload()), 2) + + def test_boundary_without_trailing_newline(self): + m = Parser().parsestr("""\ +Content-Type: multipart/mixed; boundary="===============0012394164==" +MIME-Version: 1.0 + +--===============0012394164== +Content-Type: image/file1.jpg +MIME-Version: 1.0 +Content-Transfer-Encoding: base64 + +YXNkZg== +--===============0012394164==--""") + self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') + + + +# Test some badly formatted messages +class TestNonConformant(TestEmailBase): + def test_parse_missing_minor_type(self): + eq = self.assertEqual + msg = self._msgobj('msg_14.txt') + eq(msg.get_content_type(), 'text/plain') + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + + def test_same_boundary_inner_outer(self): + msg = self._msgobj('msg_15.txt') + # XXX We can probably eventually do better + inner = msg.get_payload(0) + self.assertTrue(hasattr(inner, 'defects')) + self.assertEqual(len(inner.defects), 1) + self.assertIsInstance(inner.defects[0], + errors.StartBoundaryNotFoundDefect) + + def test_multipart_no_boundary(self): + msg = self._msgobj('msg_25.txt') + self.assertIsInstance(msg.get_payload(), str) + self.assertEqual(len(msg.defects), 2) + self.assertIsInstance(msg.defects[0], + errors.NoBoundaryInMultipartDefect) + self.assertIsInstance(msg.defects[1], + errors.MultipartInvariantViolationDefect) + + def test_invalid_content_type(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + msg = Message() + # RFC 2045, $5.2 says invalid yields text/plain + msg['Content-Type'] = 'text' + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + eq(msg.get_content_type(), 'text/plain') + # Clear the old value and try something /really/ invalid + del msg['content-type'] + msg['Content-Type'] = 'foo' + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + eq(msg.get_content_type(), 'text/plain') + # Still, make sure that the message is idempotently generated + s = StringIO() + g = Generator(s) + g.flatten(msg) + neq(s.getvalue(), 'Content-Type: foo\n\n') + + def test_no_start_boundary(self): + eq = self.ndiffAssertEqual + msg = self._msgobj('msg_31.txt') + eq(msg.get_payload(), """\ +--BOUNDARY +Content-Type: text/plain + +message 1 + +--BOUNDARY +Content-Type: text/plain + +message 2 + +--BOUNDARY-- +""") + + def test_no_separating_blank_line(self): + eq = self.ndiffAssertEqual + msg = self._msgobj('msg_35.txt') + eq(msg.as_string(), """\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: here's something interesting + +counter to RFC 2822, there's no separating newline here +""") + + def test_lying_multipart(self): + msg = self._msgobj('msg_41.txt') + self.assertTrue(hasattr(msg, 'defects')) + self.assertEqual(len(msg.defects), 2) + self.assertIsInstance(msg.defects[0], + errors.NoBoundaryInMultipartDefect) + self.assertIsInstance(msg.defects[1], + errors.MultipartInvariantViolationDefect) + + def test_missing_start_boundary(self): + outer = self._msgobj('msg_42.txt') + # The message structure is: + # + # multipart/mixed + # text/plain + # message/rfc822 + # multipart/mixed [*] + # + # [*] This message is missing its start boundary + bad = outer.get_payload(1).get_payload(0) + self.assertEqual(len(bad.defects), 1) + self.assertIsInstance(bad.defects[0], + errors.StartBoundaryNotFoundDefect) + + def test_first_line_is_continuation_header(self): + eq = self.assertEqual + m = ' Line 1\nLine 2\nLine 3' + msg = email.message_from_string(m) + eq(msg.keys(), []) + eq(msg.get_payload(), 'Line 2\nLine 3') + eq(len(msg.defects), 1) + self.assertIsInstance(msg.defects[0], + errors.FirstHeaderLineIsContinuationDefect) + eq(msg.defects[0].line, ' Line 1\n') + + + +# Test RFC 2047 header encoding and decoding +class TestRFC2047(unittest.TestCase): + def test_rfc2047_multiline(self): + eq = self.assertEqual + s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz + foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" + dh = decode_header(s) + eq(dh, [ + ('Re:', None), + ('r\x8aksm\x9arg\x8cs', 'mac-iceland'), + ('baz foo bar', None), + ('r\x8aksm\x9arg\x8cs', 'mac-iceland')]) + eq(str(make_header(dh)), + """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar + =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""") + + def test_whitespace_eater_unicode(self): + eq = self.assertEqual + s = '=?ISO-8859-1?Q?Andr=E9?= Pirard ' + dh = decode_header(s) + eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard ', None)]) + hu = unicode(make_header(dh)).encode('latin-1') + eq(hu, 'Andr\xe9 Pirard ') + + def test_whitespace_eater_unicode_2(self): + eq = self.assertEqual + s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' + dh = decode_header(s) + eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'), + ('jumped over the', None), ('lazy dog', 'iso-8859-1')]) + hu = make_header(dh).__unicode__() + eq(hu, u'The quick brown fox jumped over the lazy dog') + + def test_rfc2047_missing_whitespace(self): + s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' + dh = decode_header(s) + self.assertEqual(dh, [(s, None)]) + + def test_rfc2047_with_whitespace(self): + s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' + dh = decode_header(s) + self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'), + ('rg', None), ('\xe5', 'iso-8859-1'), + ('sbord', None)]) + + + +# Test the MIMEMessage class +class TestMIMEMessage(TestEmailBase): + def setUp(self): + fp = openfile('msg_11.txt') + try: + self._text = fp.read() + finally: + fp.close() + + def test_type_error(self): + self.assertRaises(TypeError, MIMEMessage, 'a plain string') + + def test_valid_argument(self): + eq = self.assertEqual + subject = 'A sub-message' + m = Message() + m['Subject'] = subject + r = MIMEMessage(m) + eq(r.get_content_type(), 'message/rfc822') + payload = r.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + subpart = payload[0] + self.assertIs(subpart, m) + eq(subpart['subject'], subject) + + def test_bad_multipart(self): + eq = self.assertEqual + msg1 = Message() + msg1['Subject'] = 'subpart 1' + msg2 = Message() + msg2['Subject'] = 'subpart 2' + r = MIMEMessage(msg1) + self.assertRaises(errors.MultipartConversionError, r.attach, msg2) + + def test_generate(self): + # First craft the message to be encapsulated + m = Message() + m['Subject'] = 'An enclosed message' + m.set_payload('Here is the body of the message.\n') + r = MIMEMessage(m) + r['Subject'] = 'The enclosing message' + s = StringIO() + g = Generator(s) + g.flatten(r) + self.assertEqual(s.getvalue(), """\ +Content-Type: message/rfc822 +MIME-Version: 1.0 +Subject: The enclosing message + +Subject: An enclosed message + +Here is the body of the message. +""") + + def test_parse_message_rfc822(self): + eq = self.assertEqual + msg = self._msgobj('msg_11.txt') + eq(msg.get_content_type(), 'message/rfc822') + payload = msg.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + submsg = payload[0] + self.assertIsInstance(submsg, Message) + eq(submsg['subject'], 'An enclosed message') + eq(submsg.get_payload(), 'Here is the body of the message.\n') + + def test_dsn(self): + eq = self.assertEqual + # msg 16 is a Delivery Status Notification, see RFC 1894 + msg = self._msgobj('msg_16.txt') + eq(msg.get_content_type(), 'multipart/report') + self.assertTrue(msg.is_multipart()) + eq(len(msg.get_payload()), 3) + # Subpart 1 is a text/plain, human readable section + subpart = msg.get_payload(0) + eq(subpart.get_content_type(), 'text/plain') + eq(subpart.get_payload(), """\ +This report relates to a message you sent with the following header fields: + + Message-id: <002001c144a6$8752e060$56104586@oxy.edu> + Date: Sun, 23 Sep 2001 20:10:55 -0700 + From: "Ian T. Henry" + To: SoCal Raves + Subject: [scr] yeah for Ians!! + +Your message cannot be delivered to the following recipients: + + Recipient address: jangel1@cougar.noc.ucla.edu + Reason: recipient reached disk quota + +""") + # Subpart 2 contains the machine parsable DSN information. It + # consists of two blocks of headers, represented by two nested Message + # objects. + subpart = msg.get_payload(1) + eq(subpart.get_content_type(), 'message/delivery-status') + eq(len(subpart.get_payload()), 2) + # message/delivery-status should treat each block as a bunch of + # headers, i.e. a bunch of Message objects. + dsn1 = subpart.get_payload(0) + self.assertIsInstance(dsn1, Message) + eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') + eq(dsn1.get_param('dns', header='reporting-mta'), '') + # Try a missing one + eq(dsn1.get_param('nsd', header='reporting-mta'), None) + dsn2 = subpart.get_payload(1) + self.assertIsInstance(dsn2, Message) + eq(dsn2['action'], 'failed') + eq(dsn2.get_params(header='original-recipient'), + [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) + eq(dsn2.get_param('rfc822', header='final-recipient'), '') + # Subpart 3 is the original message + subpart = msg.get_payload(2) + eq(subpart.get_content_type(), 'message/rfc822') + payload = subpart.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + subsubpart = payload[0] + self.assertIsInstance(subsubpart, Message) + eq(subsubpart.get_content_type(), 'text/plain') + eq(subsubpart['message-id'], + '<002001c144a6$8752e060$56104586@oxy.edu>') + + def test_epilogue(self): + eq = self.ndiffAssertEqual + fp = openfile('msg_21.txt') + try: + text = fp.read() + finally: + fp.close() + msg = Message() + msg['From'] = 'aperson@dom.ain' + msg['To'] = 'bperson@dom.ain' + msg['Subject'] = 'Test' + msg.preamble = 'MIME message' + msg.epilogue = 'End of MIME message\n' + msg1 = MIMEText('One') + msg2 = MIMEText('Two') + msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') + msg.attach(msg1) + msg.attach(msg2) + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), text) + + def test_no_nl_preamble(self): + eq = self.ndiffAssertEqual + msg = Message() + msg['From'] = 'aperson@dom.ain' + msg['To'] = 'bperson@dom.ain' + msg['Subject'] = 'Test' + msg.preamble = 'MIME message' + msg.epilogue = '' + msg1 = MIMEText('One') + msg2 = MIMEText('Two') + msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') + msg.attach(msg1) + msg.attach(msg2) + eq(msg.as_string(), """\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: Test +Content-Type: multipart/mixed; boundary="BOUNDARY" + +MIME message +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +One +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +Two +--BOUNDARY-- +""") + + def test_default_type(self): + eq = self.assertEqual + fp = openfile('msg_30.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + container1 = msg.get_payload(0) + eq(container1.get_default_type(), 'message/rfc822') + eq(container1.get_content_type(), 'message/rfc822') + container2 = msg.get_payload(1) + eq(container2.get_default_type(), 'message/rfc822') + eq(container2.get_content_type(), 'message/rfc822') + container1a = container1.get_payload(0) + eq(container1a.get_default_type(), 'text/plain') + eq(container1a.get_content_type(), 'text/plain') + container2a = container2.get_payload(0) + eq(container2a.get_default_type(), 'text/plain') + eq(container2a.get_content_type(), 'text/plain') + + def test_default_type_with_explicit_container_type(self): + eq = self.assertEqual + fp = openfile('msg_28.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + container1 = msg.get_payload(0) + eq(container1.get_default_type(), 'message/rfc822') + eq(container1.get_content_type(), 'message/rfc822') + container2 = msg.get_payload(1) + eq(container2.get_default_type(), 'message/rfc822') + eq(container2.get_content_type(), 'message/rfc822') + container1a = container1.get_payload(0) + eq(container1a.get_default_type(), 'text/plain') + eq(container1a.get_content_type(), 'text/plain') + container2a = container2.get_payload(0) + eq(container2a.get_default_type(), 'text/plain') + eq(container2a.get_content_type(), 'text/plain') + + def test_default_type_non_parsed(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + # Set up container + container = MIMEMultipart('digest', 'BOUNDARY') + container.epilogue = '' + # Set up subparts + subpart1a = MIMEText('message 1\n') + subpart2a = MIMEText('message 2\n') + subpart1 = MIMEMessage(subpart1a) + subpart2 = MIMEMessage(subpart2a) + container.attach(subpart1) + container.attach(subpart2) + eq(subpart1.get_content_type(), 'message/rfc822') + eq(subpart1.get_default_type(), 'message/rfc822') + eq(subpart2.get_content_type(), 'message/rfc822') + eq(subpart2.get_default_type(), 'message/rfc822') + neq(container.as_string(0), '''\ +Content-Type: multipart/digest; boundary="BOUNDARY" +MIME-Version: 1.0 + +--BOUNDARY +Content-Type: message/rfc822 +MIME-Version: 1.0 + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 1 + +--BOUNDARY +Content-Type: message/rfc822 +MIME-Version: 1.0 + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 2 + +--BOUNDARY-- +''') + del subpart1['content-type'] + del subpart1['mime-version'] + del subpart2['content-type'] + del subpart2['mime-version'] + eq(subpart1.get_content_type(), 'message/rfc822') + eq(subpart1.get_default_type(), 'message/rfc822') + eq(subpart2.get_content_type(), 'message/rfc822') + eq(subpart2.get_default_type(), 'message/rfc822') + neq(container.as_string(0), '''\ +Content-Type: multipart/digest; boundary="BOUNDARY" +MIME-Version: 1.0 + +--BOUNDARY + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 1 + +--BOUNDARY + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 2 + +--BOUNDARY-- +''') + + def test_mime_attachments_in_constructor(self): + eq = self.assertEqual + text1 = MIMEText('') + text2 = MIMEText('') + msg = MIMEMultipart(_subparts=(text1, text2)) + eq(len(msg.get_payload()), 2) + eq(msg.get_payload(0), text1) + eq(msg.get_payload(1), text2) + + + +# A general test of parser->model->generator idempotency. IOW, read a message +# in, parse it into a message object tree, then without touching the tree, +# regenerate the plain text. The original text and the transformed text +# should be identical. Note: that we ignore the Unix-From since that may +# contain a changed date. +class TestIdempotent(TestEmailBase): + def _msgobj(self, filename): + fp = openfile(filename) + try: + data = fp.read() + finally: + fp.close() + msg = email.message_from_string(data) + return msg, data + + def _idempotent(self, msg, text): + eq = self.ndiffAssertEqual + s = StringIO() + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + eq(text, s.getvalue()) + + def test_parse_text_message(self): + eq = self.assertEqual + msg, text = self._msgobj('msg_01.txt') + eq(msg.get_content_type(), 'text/plain') + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + eq(msg.get_params()[1], ('charset', 'us-ascii')) + eq(msg.get_param('charset'), 'us-ascii') + eq(msg.preamble, None) + eq(msg.epilogue, None) + self._idempotent(msg, text) + + def test_parse_untyped_message(self): + eq = self.assertEqual + msg, text = self._msgobj('msg_03.txt') + eq(msg.get_content_type(), 'text/plain') + eq(msg.get_params(), None) + eq(msg.get_param('charset'), None) + self._idempotent(msg, text) + + def test_simple_multipart(self): + msg, text = self._msgobj('msg_04.txt') + self._idempotent(msg, text) + + def test_MIME_digest(self): + msg, text = self._msgobj('msg_02.txt') + self._idempotent(msg, text) + + def test_long_header(self): + msg, text = self._msgobj('msg_27.txt') + self._idempotent(msg, text) + + def test_MIME_digest_with_part_headers(self): + msg, text = self._msgobj('msg_28.txt') + self._idempotent(msg, text) + + def test_mixed_with_image(self): + msg, text = self._msgobj('msg_06.txt') + self._idempotent(msg, text) + + def test_multipart_report(self): + msg, text = self._msgobj('msg_05.txt') + self._idempotent(msg, text) + + def test_dsn(self): + msg, text = self._msgobj('msg_16.txt') + self._idempotent(msg, text) + + def test_preamble_epilogue(self): + msg, text = self._msgobj('msg_21.txt') + self._idempotent(msg, text) + + def test_multipart_one_part(self): + msg, text = self._msgobj('msg_23.txt') + self._idempotent(msg, text) + + def test_multipart_no_parts(self): + msg, text = self._msgobj('msg_24.txt') + self._idempotent(msg, text) + + def test_no_start_boundary(self): + msg, text = self._msgobj('msg_31.txt') + self._idempotent(msg, text) + + def test_rfc2231_charset(self): + msg, text = self._msgobj('msg_32.txt') + self._idempotent(msg, text) + + def test_more_rfc2231_parameters(self): + msg, text = self._msgobj('msg_33.txt') + self._idempotent(msg, text) + + def test_text_plain_in_a_multipart_digest(self): + msg, text = self._msgobj('msg_34.txt') + self._idempotent(msg, text) + + def test_nested_multipart_mixeds(self): + msg, text = self._msgobj('msg_12a.txt') + self._idempotent(msg, text) + + def test_message_external_body_idempotent(self): + msg, text = self._msgobj('msg_36.txt') + self._idempotent(msg, text) + + def test_content_type(self): + eq = self.assertEqual + # Get a message object and reset the seek pointer for other tests + msg, text = self._msgobj('msg_05.txt') + eq(msg.get_content_type(), 'multipart/report') + # Test the Content-Type: parameters + params = {} + for pk, pv in msg.get_params(): + params[pk] = pv + eq(params['report-type'], 'delivery-status') + eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') + eq(msg.preamble, 'This is a MIME-encapsulated message.\n') + eq(msg.epilogue, '\n') + eq(len(msg.get_payload()), 3) + # Make sure the subparts are what we expect + msg1 = msg.get_payload(0) + eq(msg1.get_content_type(), 'text/plain') + eq(msg1.get_payload(), 'Yadda yadda yadda\n') + msg2 = msg.get_payload(1) + eq(msg2.get_content_type(), 'text/plain') + eq(msg2.get_payload(), 'Yadda yadda yadda\n') + msg3 = msg.get_payload(2) + eq(msg3.get_content_type(), 'message/rfc822') + self.assertIsInstance(msg3, Message) + payload = msg3.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + msg4 = payload[0] + self.assertIsInstance(msg4, Message) + eq(msg4.get_payload(), 'Yadda yadda yadda\n') + + def test_parser(self): + eq = self.assertEqual + msg, text = self._msgobj('msg_06.txt') + # Check some of the outer headers + eq(msg.get_content_type(), 'message/rfc822') + # Make sure the payload is a list of exactly one sub-Message, and that + # that submessage has a type of text/plain + payload = msg.get_payload() + self.assertIsInstance(payload, list) + eq(len(payload), 1) + msg1 = payload[0] + self.assertIsInstance(msg1, Message) + eq(msg1.get_content_type(), 'text/plain') + self.assertIsInstance(msg1.get_payload(), str) + eq(msg1.get_payload(), '\n') + + + +# Test various other bits of the package's functionality +class TestMiscellaneous(TestEmailBase): + def test_message_from_string(self): + fp = openfile('msg_01.txt') + try: + text = fp.read() + finally: + fp.close() + msg = email.message_from_string(text) + s = StringIO() + # Don't wrap/continue long headers since we're trying to test + # idempotency. + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + self.assertEqual(text, s.getvalue()) + + def test_message_from_file(self): + fp = openfile('msg_01.txt') + try: + text = fp.read() + fp.seek(0) + msg = email.message_from_file(fp) + s = StringIO() + # Don't wrap/continue long headers since we're trying to test + # idempotency. + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + self.assertEqual(text, s.getvalue()) + finally: + fp.close() + + def test_message_from_string_with_class(self): + fp = openfile('msg_01.txt') + try: + text = fp.read() + finally: + fp.close() + # Create a subclass + class MyMessage(Message): + pass + + msg = email.message_from_string(text, MyMessage) + self.assertIsInstance(msg, MyMessage) + # Try something more complicated + fp = openfile('msg_02.txt') + try: + text = fp.read() + finally: + fp.close() + msg = email.message_from_string(text, MyMessage) + for subpart in msg.walk(): + self.assertIsInstance(subpart, MyMessage) + + def test_message_from_file_with_class(self): + # Create a subclass + class MyMessage(Message): + pass + + fp = openfile('msg_01.txt') + try: + msg = email.message_from_file(fp, MyMessage) + finally: + fp.close() + self.assertIsInstance(msg, MyMessage) + # Try something more complicated + fp = openfile('msg_02.txt') + try: + msg = email.message_from_file(fp, MyMessage) + finally: + fp.close() + for subpart in msg.walk(): + self.assertIsInstance(subpart, MyMessage) + + def test__all__(self): + module = __import__('email') + # Can't use sorted() here due to Python 2.3 compatibility + all = module.__all__[:] + all.sort() + self.assertEqual(all, [ + # Old names + 'Charset', 'Encoders', 'Errors', 'Generator', + 'Header', 'Iterators', 'MIMEAudio', 'MIMEBase', + 'MIMEImage', 'MIMEMessage', 'MIMEMultipart', + 'MIMENonMultipart', 'MIMEText', 'Message', + 'Parser', 'Utils', 'base64MIME', + # new names + 'base64mime', 'charset', 'encoders', 'errors', 'generator', + 'header', 'iterators', 'message', 'message_from_file', + 'message_from_string', 'mime', 'parser', + 'quopriMIME', 'quoprimime', 'utils', + ]) + + def test_formatdate(self): + now = time.time() + self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], + time.gmtime(now)[:6]) + + def test_formatdate_localtime(self): + now = time.time() + self.assertEqual( + utils.parsedate(utils.formatdate(now, localtime=True))[:6], + time.localtime(now)[:6]) + + def test_formatdate_usegmt(self): + now = time.time() + self.assertEqual( + utils.formatdate(now, localtime=False), + time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) + self.assertEqual( + utils.formatdate(now, localtime=False, usegmt=True), + time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) + + def test_parsedate_none(self): + self.assertEqual(utils.parsedate(''), None) + + def test_parsedate_compact(self): + # The FWS after the comma is optional + self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), + utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) + + def test_parsedate_no_dayofweek(self): + eq = self.assertEqual + eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), + (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) + + def test_parsedate_compact_no_dayofweek(self): + eq = self.assertEqual + eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), + (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) + + def test_parsedate_acceptable_to_time_functions(self): + eq = self.assertEqual + timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') + t = int(time.mktime(timetup)) + eq(time.localtime(t)[:6], timetup[:6]) + eq(int(time.strftime('%Y', timetup)), 2003) + timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') + t = int(time.mktime(timetup[:9])) + eq(time.localtime(t)[:6], timetup[:6]) + eq(int(time.strftime('%Y', timetup[:9])), 2003) + + def test_parseaddr_empty(self): + self.assertEqual(utils.parseaddr('<>'), ('', '')) + self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') + + def test_noquote_dump(self): + self.assertEqual( + utils.formataddr(('A Silly Person', 'person@dom.ain')), + 'A Silly Person ') + + def test_escape_dump(self): + self.assertEqual( + utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), + r'"A \(Very\) Silly Person" ') + a = r'A \(Special\) Person' + b = 'person@dom.ain' + self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) + + def test_escape_backslashes(self): + self.assertEqual( + utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')), + r'"Arthur \\Backslash\\ Foobar" ') + a = r'Arthur \Backslash\ Foobar' + b = 'person@dom.ain' + self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) + + def test_name_with_dot(self): + x = 'John X. Doe ' + y = '"John X. Doe" ' + a, b = ('John X. Doe', 'jxd@example.com') + self.assertEqual(utils.parseaddr(x), (a, b)) + self.assertEqual(utils.parseaddr(y), (a, b)) + # formataddr() quotes the name if there's a dot in it + self.assertEqual(utils.formataddr((a, b)), y) + + def test_multiline_from_comment(self): + x = """\ +Foo +\tBar """ + self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) + + def test_quote_dump(self): + self.assertEqual( + utils.formataddr(('A Silly; Person', 'person@dom.ain')), + r'"A Silly; Person" ') + + def test_fix_eols(self): + eq = self.assertEqual + eq(utils.fix_eols('hello'), 'hello') + eq(utils.fix_eols('hello\n'), 'hello\r\n') + eq(utils.fix_eols('hello\r'), 'hello\r\n') + eq(utils.fix_eols('hello\r\n'), 'hello\r\n') + eq(utils.fix_eols('hello\n\r'), 'hello\r\n\r\n') + + def test_charset_richcomparisons(self): + eq = self.assertEqual + ne = self.assertNotEqual + cset1 = Charset() + cset2 = Charset() + eq(cset1, 'us-ascii') + eq(cset1, 'US-ASCII') + eq(cset1, 'Us-AsCiI') + eq('us-ascii', cset1) + eq('US-ASCII', cset1) + eq('Us-AsCiI', cset1) + ne(cset1, 'usascii') + ne(cset1, 'USASCII') + ne(cset1, 'UsAsCiI') + ne('usascii', cset1) + ne('USASCII', cset1) + ne('UsAsCiI', cset1) + eq(cset1, cset2) + eq(cset2, cset1) + + def test_getaddresses(self): + eq = self.assertEqual + eq(utils.getaddresses(['aperson@dom.ain (Al Person)', + 'Bud Person ']), + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + + def test_getaddresses_nasty(self): + eq = self.assertEqual + eq(utils.getaddresses(['foo: ;']), [('', '')]) + eq(utils.getaddresses( + ['[]*-- =~$']), + [('', ''), ('', ''), ('', '*--')]) + eq(utils.getaddresses( + ['foo: ;', '"Jason R. Mastaler" ']), + [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" + eq = self.assertEqual + addrs = utils.getaddresses(['User ((nested comment)) ']) + eq(addrs[0][1], 'foo@bar.com') + + def test_utils_quote_unquote(self): + eq = self.assertEqual + msg = Message() + msg.add_header('content-disposition', 'attachment', + filename='foo\\wacky"name') + eq(msg.get_filename(), 'foo\\wacky"name') + + def test_get_body_encoding_with_bogus_charset(self): + charset = Charset('not a charset') + self.assertEqual(charset.get_body_encoding(), 'base64') + + def test_get_body_encoding_with_uppercase_charset(self): + eq = self.assertEqual + msg = Message() + msg['Content-Type'] = 'text/plain; charset=UTF-8' + eq(msg['content-type'], 'text/plain; charset=UTF-8') + charsets = msg.get_charsets() + eq(len(charsets), 1) + eq(charsets[0], 'utf-8') + charset = Charset(charsets[0]) + eq(charset.get_body_encoding(), 'base64') + msg.set_payload('hello world', charset=charset) + eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') + eq(msg.get_payload(decode=True), 'hello world') + eq(msg['content-transfer-encoding'], 'base64') + # Try another one + msg = Message() + msg['Content-Type'] = 'text/plain; charset="US-ASCII"' + charsets = msg.get_charsets() + eq(len(charsets), 1) + eq(charsets[0], 'us-ascii') + charset = Charset(charsets[0]) + eq(charset.get_body_encoding(), encoders.encode_7or8bit) + msg.set_payload('hello world', charset=charset) + eq(msg.get_payload(), 'hello world') + eq(msg['content-transfer-encoding'], '7bit') + + def test_charsets_case_insensitive(self): + lc = Charset('us-ascii') + uc = Charset('US-ASCII') + self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) + + def test_partial_falls_inside_message_delivery_status(self): + eq = self.ndiffAssertEqual + # The Parser interface provides chunks of data to FeedParser in 8192 + # byte gulps. SF bug #1076485 found one of those chunks inside + # message/delivery-status header block, which triggered an + # unreadline() of NeedMoreData. + msg = self._msgobj('msg_43.txt') + sfp = StringIO() + iterators._structure(msg, sfp) + eq(sfp.getvalue(), """\ +multipart/report + text/plain + message/delivery-status + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/plain + text/rfc822-headers +""") + + + +# Test the iterator/generators +class TestIterators(TestEmailBase): + def test_body_line_iterator(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + # First a simple non-multipart message + msg = self._msgobj('msg_01.txt') + it = iterators.body_line_iterator(msg) + lines = list(it) + eq(len(lines), 6) + neq(EMPTYSTRING.join(lines), msg.get_payload()) + # Now a more complicated multipart + msg = self._msgobj('msg_02.txt') + it = iterators.body_line_iterator(msg) + lines = list(it) + eq(len(lines), 43) + fp = openfile('msg_19.txt') + try: + neq(EMPTYSTRING.join(lines), fp.read()) + finally: + fp.close() + + def test_typed_subpart_iterator(self): + eq = self.assertEqual + msg = self._msgobj('msg_04.txt') + it = iterators.typed_subpart_iterator(msg, 'text') + lines = [] + subparts = 0 + for subpart in it: + subparts += 1 + lines.append(subpart.get_payload()) + eq(subparts, 2) + eq(EMPTYSTRING.join(lines), """\ +a simple kind of mirror +to reflect upon our own +a simple kind of mirror +to reflect upon our own +""") + + def test_typed_subpart_iterator_default_type(self): + eq = self.assertEqual + msg = self._msgobj('msg_03.txt') + it = iterators.typed_subpart_iterator(msg, 'text', 'plain') + lines = [] + subparts = 0 + for subpart in it: + subparts += 1 + lines.append(subpart.get_payload()) + eq(subparts, 1) + eq(EMPTYSTRING.join(lines), """\ + +Hi, + +Do you like this message? + +-Me +""") + + + +class TestParsers(TestEmailBase): + def test_header_parser(self): + eq = self.assertEqual + # Parse only the headers of a complex multipart MIME document + fp = openfile('msg_02.txt') + try: + msg = HeaderParser().parse(fp) + finally: + fp.close() + eq(msg['from'], 'ppp-request@zzz.org') + eq(msg['to'], 'ppp@zzz.org') + eq(msg.get_content_type(), 'multipart/mixed') + self.assertFalse(msg.is_multipart()) + self.assertIsInstance(msg.get_payload(), str) + + def test_whitespace_continuation(self): + eq = self.assertEqual + # This message contains a line after the Subject: header that has only + # whitespace, but it is not empty! + msg = email.message_from_string("""\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: the next line has a space on it +\x20 +Date: Mon, 8 Apr 2002 15:09:19 -0400 +Message-ID: spam + +Here's the message body +""") + eq(msg['subject'], 'the next line has a space on it\n ') + eq(msg['message-id'], 'spam') + eq(msg.get_payload(), "Here's the message body\n") + + def test_whitespace_continuation_last_header(self): + eq = self.assertEqual + # Like the previous test, but the subject line is the last + # header. + msg = email.message_from_string("""\ +From: aperson@dom.ain +To: bperson@dom.ain +Date: Mon, 8 Apr 2002 15:09:19 -0400 +Message-ID: spam +Subject: the next line has a space on it +\x20 + +Here's the message body +""") + eq(msg['subject'], 'the next line has a space on it\n ') + eq(msg['message-id'], 'spam') + eq(msg.get_payload(), "Here's the message body\n") + + def test_crlf_separation(self): + eq = self.assertEqual + fp = openfile('msg_26.txt', mode='rb') + try: + msg = Parser().parse(fp) + finally: + fp.close() + eq(len(msg.get_payload()), 2) + part1 = msg.get_payload(0) + eq(part1.get_content_type(), 'text/plain') + eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') + part2 = msg.get_payload(1) + eq(part2.get_content_type(), 'application/riscos') + + def test_multipart_digest_with_extra_mime_headers(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + fp = openfile('msg_28.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + # Structure is: + # multipart/digest + # message/rfc822 + # text/plain + # message/rfc822 + # text/plain + eq(msg.is_multipart(), 1) + eq(len(msg.get_payload()), 2) + part1 = msg.get_payload(0) + eq(part1.get_content_type(), 'message/rfc822') + eq(part1.is_multipart(), 1) + eq(len(part1.get_payload()), 1) + part1a = part1.get_payload(0) + eq(part1a.is_multipart(), 0) + eq(part1a.get_content_type(), 'text/plain') + neq(part1a.get_payload(), 'message 1\n') + # next message/rfc822 + part2 = msg.get_payload(1) + eq(part2.get_content_type(), 'message/rfc822') + eq(part2.is_multipart(), 1) + eq(len(part2.get_payload()), 1) + part2a = part2.get_payload(0) + eq(part2a.is_multipart(), 0) + eq(part2a.get_content_type(), 'text/plain') + neq(part2a.get_payload(), 'message 2\n') + + def test_three_lines(self): + # A bug report by Andrew McNamara + lines = ['From: Andrew Person From', 'From']) + eq(msg.get_payload(), 'body') + + def test_rfc2822_space_not_allowed_in_header(self): + eq = self.assertEqual + m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' + msg = email.message_from_string(m) + eq(len(msg.keys()), 0) + + def test_rfc2822_one_character_header(self): + eq = self.assertEqual + m = 'A: first header\nB: second header\nCC: third header\n\nbody' + msg = email.message_from_string(m) + headers = msg.keys() + headers.sort() + eq(headers, ['A', 'B', 'CC']) + eq(msg.get_payload(), 'body') + + + +class TestBase64(unittest.TestCase): + def test_len(self): + eq = self.assertEqual + eq(base64mime.base64_len('hello'), + len(base64mime.encode('hello', eol=''))) + for size in range(15): + if size == 0 : bsize = 0 + elif size <= 3 : bsize = 4 + elif size <= 6 : bsize = 8 + elif size <= 9 : bsize = 12 + elif size <= 12: bsize = 16 + else : bsize = 20 + eq(base64mime.base64_len('x'*size), bsize) + + def test_decode(self): + eq = self.assertEqual + eq(base64mime.decode(''), '') + eq(base64mime.decode('aGVsbG8='), 'hello') + eq(base64mime.decode('aGVsbG8=', 'X'), 'hello') + eq(base64mime.decode('aGVsbG8NCndvcmxk\n', 'X'), 'helloXworld') + + def test_encode(self): + eq = self.assertEqual + eq(base64mime.encode(''), '') + eq(base64mime.encode('hello'), 'aGVsbG8=\n') + # Test the binary flag + eq(base64mime.encode('hello\n'), 'aGVsbG8K\n') + eq(base64mime.encode('hello\n', 0), 'aGVsbG8NCg==\n') + # Test the maxlinelen arg + eq(base64mime.encode('xxxx ' * 20, maxlinelen=40), """\ +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IA== +""") + # Test the eol argument + eq(base64mime.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IA==\r +""") + + def test_header_encode(self): + eq = self.assertEqual + he = base64mime.header_encode + eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') + eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') + # Test the charset option + eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') + # Test the keep_eols flag + eq(he('hello\nworld', keep_eols=True), + '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') + # Test the maxlinelen argument + eq(he('xxxx ' * 20, maxlinelen=40), """\ +=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?= + =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?= + =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?= + =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?= + =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?= + =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""") + # Test the eol argument + eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=\r + =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=\r + =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=\r + =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=\r + =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=\r + =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""") + + + +class TestQuopri(unittest.TestCase): + def setUp(self): + self.hlit = [chr(x) for x in range(ord('a'), ord('z')+1)] + \ + [chr(x) for x in range(ord('A'), ord('Z')+1)] + \ + [chr(x) for x in range(ord('0'), ord('9')+1)] + \ + ['!', '*', '+', '-', '/', ' '] + self.hnon = [chr(x) for x in range(256) if chr(x) not in self.hlit] + assert len(self.hlit) + len(self.hnon) == 256 + self.blit = [chr(x) for x in range(ord(' '), ord('~')+1)] + ['\t'] + self.blit.remove('=') + self.bnon = [chr(x) for x in range(256) if chr(x) not in self.blit] + assert len(self.blit) + len(self.bnon) == 256 + + def test_header_quopri_check(self): + for c in self.hlit: + self.assertFalse(quoprimime.header_quopri_check(c)) + for c in self.hnon: + self.assertTrue(quoprimime.header_quopri_check(c)) + + def test_body_quopri_check(self): + for c in self.blit: + self.assertFalse(quoprimime.body_quopri_check(c)) + for c in self.bnon: + self.assertTrue(quoprimime.body_quopri_check(c)) + + def test_header_quopri_len(self): + eq = self.assertEqual + hql = quoprimime.header_quopri_len + enc = quoprimime.header_encode + for s in ('hello', 'h@e@l@l@o@'): + # Empty charset and no line-endings. 7 == RFC chrome + eq(hql(s), len(enc(s, charset='', eol=''))-7) + for c in self.hlit: + eq(hql(c), 1) + for c in self.hnon: + eq(hql(c), 3) + + def test_body_quopri_len(self): + eq = self.assertEqual + bql = quoprimime.body_quopri_len + for c in self.blit: + eq(bql(c), 1) + for c in self.bnon: + eq(bql(c), 3) + + def test_quote_unquote_idempotent(self): + for x in range(256): + c = chr(x) + self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) + + def test_header_encode(self): + eq = self.assertEqual + he = quoprimime.header_encode + eq(he('hello'), '=?iso-8859-1?q?hello?=') + eq(he('hello\nworld'), '=?iso-8859-1?q?hello=0D=0Aworld?=') + # Test the charset option + eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=') + # Test the keep_eols flag + eq(he('hello\nworld', keep_eols=True), '=?iso-8859-1?q?hello=0Aworld?=') + # Test a non-ASCII character + eq(he('hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=') + # Test the maxlinelen argument + eq(he('xxxx ' * 20, maxlinelen=40), """\ +=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= + =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= + =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?= + =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?= + =?iso-8859-1?q?x_xxxx_xxxx_?=""") + # Test the eol argument + eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=\r + =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=\r + =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=\r + =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=\r + =?iso-8859-1?q?x_xxxx_xxxx_?=""") + + def test_decode(self): + eq = self.assertEqual + eq(quoprimime.decode(''), '') + eq(quoprimime.decode('hello'), 'hello') + eq(quoprimime.decode('hello', 'X'), 'hello') + eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld') + + def test_encode(self): + eq = self.assertEqual + eq(quoprimime.encode(''), '') + eq(quoprimime.encode('hello'), 'hello') + # Test the binary flag + eq(quoprimime.encode('hello\r\nworld'), 'hello\nworld') + eq(quoprimime.encode('hello\r\nworld', 0), 'hello\nworld') + # Test the maxlinelen arg + eq(quoprimime.encode('xxxx ' * 20, maxlinelen=40), """\ +xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= + xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= +x xxxx xxxx xxxx xxxx=20""") + # Test the eol argument + eq(quoprimime.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r + xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r +x xxxx xxxx xxxx xxxx=20""") + eq(quoprimime.encode("""\ +one line + +two line"""), """\ +one line + +two line""") + + + +# Test the Charset class +class TestCharset(unittest.TestCase): + def tearDown(self): + from email import charset as CharsetModule + try: + del CharsetModule.CHARSETS['fake'] + except KeyError: + pass + + def test_idempotent(self): + eq = self.assertEqual + # Make sure us-ascii = no Unicode conversion + c = Charset('us-ascii') + s = 'Hello World!' + sp = c.to_splittable(s) + eq(s, c.from_splittable(sp)) + # test 8-bit idempotency with us-ascii + s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' + sp = c.to_splittable(s) + eq(s, c.from_splittable(sp)) + + def test_body_encode(self): + eq = self.assertEqual + # Try a charset with QP body encoding + c = Charset('iso-8859-1') + eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) + # Try a charset with Base64 body encoding + c = Charset('utf-8') + eq('aGVsbG8gd29ybGQ=\n', c.body_encode('hello world')) + # Try a charset with None body encoding + c = Charset('us-ascii') + eq('hello world', c.body_encode('hello world')) + # Try the convert argument, where input codec != output codec + c = Charset('euc-jp') + # With apologies to Tokio Kikuchi ;) + try: + eq('\x1b$B5FCO;~IW\x1b(B', + c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) + eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', + c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) + except LookupError: + # We probably don't have the Japanese codecs installed + pass + # Testing SF bug #625509, which we have to fake, since there are no + # built-in encodings where the header encoding is QP but the body + # encoding is not. + from email import charset as CharsetModule + CharsetModule.add_charset('fake', CharsetModule.QP, None) + c = Charset('fake') + eq('hello w\xf6rld', c.body_encode('hello w\xf6rld')) + + def test_unicode_charset_name(self): + charset = Charset(u'us-ascii') + self.assertEqual(str(charset), 'us-ascii') + self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') + + + +# Test multilingual MIME headers. +class TestHeader(TestEmailBase): + def test_simple(self): + eq = self.ndiffAssertEqual + h = Header('Hello World!') + eq(h.encode(), 'Hello World!') + h.append(' Goodbye World!') + eq(h.encode(), 'Hello World! Goodbye World!') + + def test_simple_surprise(self): + eq = self.ndiffAssertEqual + h = Header('Hello World!') + eq(h.encode(), 'Hello World!') + h.append('Goodbye World!') + eq(h.encode(), 'Hello World! Goodbye World!') + + def test_header_needs_no_decoding(self): + h = 'no decoding needed' + self.assertEqual(decode_header(h), [(h, None)]) + + def test_long(self): + h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", + maxlinelen=76) + for l in h.encode(splitchars=' ').split('\n '): + self.assertLessEqual(len(l), 76) + + def test_multilingual(self): + eq = self.ndiffAssertEqual + g = Charset("iso-8859-1") + cz = Charset("iso-8859-2") + utf8 = Charset("utf-8") + g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " + cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. " + utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8") + h = Header(g_head, g) + h.append(cz_head, cz) + h.append(utf8_head, utf8) + enc = h.encode() + eq(enc, """\ +=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_ko?= + =?iso-8859-1?q?mfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wan?= + =?iso-8859-1?q?dgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6?= + =?iso-8859-1?q?rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= + =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= + =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= + =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= + =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= + =?utf-8?q?_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das_Oder_die_Fl?= + =?utf-8?b?aXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBo+OBpuOBhOOBvuOBmQ==?= + =?utf-8?b?44CC?=""") + eq(decode_header(enc), + [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"), + (utf8_head, "utf-8")]) + ustr = unicode(h) + eq(ustr.encode('utf-8'), + 'Die Mieter treten hier ein werden mit einem Foerderband ' + 'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' + 'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' + 'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' + 'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' + '\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' + '\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' + '\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' + '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' + '\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' + '\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' + '\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' + '\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' + 'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' + 'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' + '\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82') + # Test make_header() + newh = make_header(decode_header(enc)) + eq(newh, enc) + + def test_header_ctor_default_args(self): + eq = self.ndiffAssertEqual + h = Header() + eq(h, '') + h.append('foo', Charset('iso-8859-1')) + eq(h, '=?iso-8859-1?q?foo?=') + + def test_explicit_maxlinelen(self): + eq = self.ndiffAssertEqual + hstr = 'A very long line that must get split to something other than at the 76th character boundary to test the non-default behavior' + h = Header(hstr) + eq(h.encode(), '''\ +A very long line that must get split to something other than at the 76th + character boundary to test the non-default behavior''') + h = Header(hstr, header_name='Subject') + eq(h.encode(), '''\ +A very long line that must get split to something other than at the + 76th character boundary to test the non-default behavior''') + h = Header(hstr, maxlinelen=1024, header_name='Subject') + eq(h.encode(), hstr) + + def test_us_ascii_header(self): + eq = self.assertEqual + s = 'hello' + x = decode_header(s) + eq(x, [('hello', None)]) + h = make_header(x) + eq(s, h.encode()) + + def test_string_charset(self): + eq = self.assertEqual + h = Header() + h.append('hello', 'iso-8859-1') + eq(h, '=?iso-8859-1?q?hello?=') + +## def test_unicode_error(self): +## raises = self.assertRaises +## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') +## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') +## h = Header() +## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') +## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') +## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') + + def test_utf8_shortest(self): + eq = self.assertEqual + h = Header(u'p\xf6stal', 'utf-8') + eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') + h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8') + eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') + + def test_bad_8bit_header(self): + raises = self.assertRaises + eq = self.assertEqual + x = 'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' + raises(UnicodeError, Header, x) + h = Header() + raises(UnicodeError, h.append, x) + eq(str(Header(x, errors='replace')), x) + h.append(x, errors='replace') + eq(str(h), x) + + def test_encoded_adjacent_nonencoded(self): + eq = self.assertEqual + h = Header() + h.append('hello', 'iso-8859-1') + h.append('world') + s = h.encode() + eq(s, '=?iso-8859-1?q?hello?= world') + h = make_header(decode_header(s)) + eq(h.encode(), s) + + def test_whitespace_eater(self): + eq = self.assertEqual + s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' + parts = decode_header(s) + eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)]) + hdr = make_header(parts) + eq(hdr.encode(), + 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') + + def test_broken_base64_header(self): + raises = self.assertRaises + s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' + raises(errors.HeaderParseError, decode_header, s) + + + +# Test RFC 2231 header parameters (en/de)coding +class TestRFC2231(TestEmailBase): + def test_get_param(self): + eq = self.assertEqual + msg = self._msgobj('msg_29.txt') + eq(msg.get_param('title'), + ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) + eq(msg.get_param('title', unquote=False), + ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) + + def test_set_param(self): + eq = self.assertEqual + msg = Message() + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii') + eq(msg.get_param('title'), + ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii', language='en') + eq(msg.get_param('title'), + ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) + msg = self._msgobj('msg_01.txt') + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii', language='en') + self.ndiffAssertEqual(msg.as_string(), """\ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 +Content-Type: text/plain; charset=us-ascii; + title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21" + + +Hi, + +Do you like this message? + +-Me +""") + + def test_del_param(self): + eq = self.ndiffAssertEqual + msg = self._msgobj('msg_01.txt') + msg.set_param('foo', 'bar', charset='us-ascii', language='en') + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii', language='en') + msg.del_param('foo', header='Content-Type') + eq(msg.as_string(), """\ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) + id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 +Content-Type: text/plain; charset="us-ascii"; + title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21" + + +Hi, + +Do you like this message? + +-Me +""") + + def test_rfc2231_get_content_charset(self): + eq = self.assertEqual + msg = self._msgobj('msg_32.txt') + eq(msg.get_content_charset(), 'us-ascii') + + def test_rfc2231_no_language_or_charset(self): + m = '''\ +Content-Transfer-Encoding: 8bit +Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" +Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm + +''' + msg = email.message_from_string(m) + param = msg.get_param('NAME') + self.assertFalse(isinstance(param, tuple)) + self.assertEqual( + param, + 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') + + def test_rfc2231_no_language_or_charset_in_filename(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="''This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_no_language_or_charset_in_filename_encoded(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="''This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_partly_encoded(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0="''This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual( + msg.get_filename(), + 'This%20is%20even%20more%20***fun*** is it not.pdf') + + def test_rfc2231_partly_nonencoded(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0="This%20is%20even%20more%20"; +\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual( + msg.get_filename(), + 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') + + def test_rfc2231_no_language_or_charset_in_boundary(self): + m = '''\ +Content-Type: multipart/alternative; +\tboundary*0*="''This%20is%20even%20more%20"; +\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tboundary*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_boundary(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_no_language_or_charset_in_charset(self): + # This is a nonsensical charset value, but tests the code anyway + m = '''\ +Content-Type: text/plain; +\tcharset*0*="This%20is%20even%20more%20"; +\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tcharset*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_content_charset(), + 'this is even more ***fun*** is it not.pdf') + + def test_rfc2231_bad_encoding_in_filename(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + 'This is even more ***fun*** is it not.pdf') + + def test_rfc2231_bad_encoding_in_charset(self): + m = """\ +Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D + +""" + msg = email.message_from_string(m) + # This should return None because non-ascii characters in the charset + # are not allowed. + self.assertEqual(msg.get_content_charset(), None) + + def test_rfc2231_bad_character_in_charset(self): + m = """\ +Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D + +""" + msg = email.message_from_string(m) + # This should return None because non-ascii characters in the charset + # are not allowed. + self.assertEqual(msg.get_content_charset(), None) + + def test_rfc2231_bad_character_in_filename(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2*="is it not.pdf%E2" + +''' + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), + u'This is even more ***fun*** is it not.pdf\ufffd') + + def test_rfc2231_unknown_encoding(self): + m = """\ +Content-Transfer-Encoding: 8bit +Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt + +""" + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), 'myfile.txt') + + def test_rfc2231_single_tick_in_filename_extended(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0*=\"Frank's\"; name*1*=\" Document\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, None) + eq(language, None) + eq(s, "Frank's Document") + + def test_rfc2231_single_tick_in_filename(self): + m = """\ +Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" + +""" + msg = email.message_from_string(m) + param = msg.get_param('name') + self.assertFalse(isinstance(param, tuple)) + self.assertEqual(param, "Frank's Document") + + def test_rfc2231_tick_attack_extended(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, 'us-ascii') + eq(language, 'en-us') + eq(s, "Frank's Document") + + def test_rfc2231_tick_attack(self): + m = """\ +Content-Type: application/x-foo; +\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" + +""" + msg = email.message_from_string(m) + param = msg.get_param('name') + self.assertFalse(isinstance(param, tuple)) + self.assertEqual(param, "us-ascii'en-us'Frank's Document") + + def test_rfc2231_no_extended_values(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; name=\"Frank's Document\" + +""" + msg = email.message_from_string(m) + eq(msg.get_param('name'), "Frank's Document") + + def test_rfc2231_encoded_then_unencoded_segments(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0*=\"us-ascii'en-us'My\"; +\tname*1=\" Document\"; +\tname*2*=\" For You\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, 'us-ascii') + eq(language, 'en-us') + eq(s, 'My Document For You') + + def test_rfc2231_unencoded_then_encoded_segments(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0=\"us-ascii'en-us'My\"; +\tname*1*=\" Document\"; +\tname*2*=\" For You\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, 'us-ascii') + eq(language, 'en-us') + eq(s, 'My Document For You') + + + +def _testclasses(): + mod = sys.modules[__name__] + return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')] + + +def suite(): + suite = unittest.TestSuite() + for testclass in _testclasses(): + suite.addTest(unittest.makeSuite(testclass)) + return suite + + +def test_main(): + for testclass in _testclasses(): + run_unittest(testclass) + + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/third_party/stdlib/email/test/test_email_torture.py b/third_party/stdlib/email/test/test_email_torture.py new file mode 100644 index 00000000..9dfa6028 --- /dev/null +++ b/third_party/stdlib/email/test/test_email_torture.py @@ -0,0 +1,136 @@ +# Copyright (C) 2002-2004 Python Software Foundation +# +# A torture test of the email package. This should not be run as part of the +# standard Python test suite since it requires several meg of email messages +# collected in the wild. These source messages are not checked into the +# Python distro, but are available as part of the standalone email package at +# http://sf.net/projects/mimelib + +import sys +import os +import unittest +from cStringIO import StringIO +from types import ListType + +from email.test.test_email import TestEmailBase +from test.test_support import TestSkipped, run_unittest + +import email +from email import __file__ as testfile +from email.iterators import _structure + +def openfile(filename): + from os.path import join, dirname, abspath + path = abspath(join(dirname(testfile), os.pardir, 'moredata', filename)) + return open(path, 'r') + +# Prevent this test from running in the Python distro +try: + openfile('crispin-torture.txt') +except IOError: + raise TestSkipped + + + +class TortureBase(TestEmailBase): + def _msgobj(self, filename): + fp = openfile(filename) + try: + msg = email.message_from_file(fp) + finally: + fp.close() + return msg + + + +class TestCrispinTorture(TortureBase): + # Mark Crispin's torture test from the SquirrelMail project + def test_mondo_message(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + msg = self._msgobj('crispin-torture.txt') + payload = msg.get_payload() + eq(type(payload), ListType) + eq(len(payload), 12) + eq(msg.preamble, None) + eq(msg.epilogue, '\n') + # Probably the best way to verify the message is parsed correctly is to + # dump its structure and compare it against the known structure. + fp = StringIO() + _structure(msg, fp=fp) + neq(fp.getvalue(), """\ +multipart/mixed + text/plain + message/rfc822 + multipart/alternative + text/plain + multipart/mixed + text/richtext + application/andrew-inset + message/rfc822 + audio/basic + audio/basic + image/pbm + message/rfc822 + multipart/mixed + multipart/mixed + text/plain + audio/x-sun + multipart/mixed + image/gif + image/gif + application/x-be2 + application/atomicmail + audio/x-sun + message/rfc822 + multipart/mixed + text/plain + image/pgm + text/plain + message/rfc822 + multipart/mixed + text/plain + image/pbm + message/rfc822 + application/postscript + image/gif + message/rfc822 + multipart/mixed + audio/basic + audio/basic + message/rfc822 + multipart/mixed + application/postscript + text/plain + message/rfc822 + multipart/mixed + text/plain + multipart/parallel + image/gif + audio/basic + application/atomicmail + message/rfc822 + audio/x-sun +""") + + +def _testclasses(): + mod = sys.modules[__name__] + return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')] + + +def suite(): + suite = unittest.TestSuite() + for testclass in _testclasses(): + suite.addTest(unittest.makeSuite(testclass)) + return suite + + +def test_main(): + for testclass in _testclasses(): + run_unittest(testclass) + + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/third_party/stdlib/email/utils.py b/third_party/stdlib/email/utils.py new file mode 100644 index 00000000..ac13f49d --- /dev/null +++ b/third_party/stdlib/email/utils.py @@ -0,0 +1,323 @@ +# Copyright (C) 2001-2010 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Miscellaneous utilities.""" + +__all__ = [ + 'collapse_rfc2231_value', + 'decode_params', + 'decode_rfc2231', + 'encode_rfc2231', + 'formataddr', + 'formatdate', + 'getaddresses', + 'make_msgid', + 'mktime_tz', + 'parseaddr', + 'parsedate', + 'parsedate_tz', + 'unquote', + ] + +import os +import re +import time +import base64 +import random +import socket +import urllib +import warnings + +from email._parseaddr import quote +from email._parseaddr import AddressList as _AddressList +from email._parseaddr import mktime_tz + +# We need wormarounds for bugs in these methods in older Pythons (see below) +from email._parseaddr import parsedate as _parsedate +from email._parseaddr import parsedate_tz as _parsedate_tz + +from quopri import decodestring as _qdecode + +# Intrapackage imports +from email.encoders import _bencode, _qencode + +COMMASPACE = ', ' +EMPTYSTRING = '' +UEMPTYSTRING = u'' +CRLF = '\r\n' +TICK = "'" + +specialsre = re.compile(r'[][\\()<>@,:;".]') +escapesre = re.compile(r'[][\\()"]') + + + +# Helpers + +def _identity(s): + return s + + +def _bdecode(s): + """Decodes a base64 string. + + This function is equivalent to base64.decodestring and it's retained only + for backward compatibility. It used to remove the last \\n of the decoded + string, if it had any (see issue 7143). + """ + if not s: + return s + return base64.decodestring(s) + + + +def fix_eols(s): + """Replace all line-ending characters with \\r\\n.""" + # Fix newlines with no preceding carriage return + s = re.sub(r'(?', name) + return '%s%s%s <%s>' % (quotes, name, quotes, address) + return address + + + +def getaddresses(fieldvalues): + """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" + all = COMMASPACE.join(fieldvalues) + a = _AddressList(all) + return a.addresslist + + + +ecre = re.compile(r''' + =\? # literal =? + (?P[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P.*?) # non-greedy up to the next ?= is the atom + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE) + + + +def formatdate(timeval=None, localtime=False, usegmt=False): + """Returns a date string as specified by RFC 2822, e.g.: + + Fri, 09 Nov 2001 01:08:47 -0000 + + Optional timeval if given is a floating point time value as accepted by + gmtime() and localtime(), otherwise the current time is used. + + Optional localtime is a flag that when True, interprets timeval, and + returns a date relative to the local timezone instead of UTC, properly + taking daylight savings time into account. + + Optional argument usegmt means that the timezone is written out as + an ascii string, not numeric one (so "GMT" instead of "+0000"). This + is needed for HTTP, and is only used when localtime==False. + """ + # Note: we cannot use strftime() because that honors the locale and RFC + # 2822 requires that day and month names be the English abbreviations. + if timeval is None: + timeval = time.time() + if localtime: + now = time.localtime(timeval) + # Calculate timezone offset, based on whether the local zone has + # daylight savings time, and whether DST is in effect. + if time.daylight and now[-1]: + offset = time.altzone + else: + offset = time.timezone + hours, minutes = divmod(abs(offset), 3600) + # Remember offset is in seconds west of UTC, but the timezone is in + # minutes east of UTC, so the signs differ. + if offset > 0: + sign = '-' + else: + sign = '+' + zone = '%s%02d%02d' % (sign, hours, minutes // 60) + else: + now = time.gmtime(timeval) + # Timezone offset is always -0000 + if usegmt: + zone = 'GMT' + else: + zone = '-0000' + return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( + ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], + now[2], + ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], + now[0], now[3], now[4], now[5], + zone) + + + +def make_msgid(idstring=None): + """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: + + <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com> + + Optional idstring if given is a string used to strengthen the + uniqueness of the message id. + """ + timeval = int(time.time()*100) + pid = os.getpid() + randint = random.getrandbits(64) + if idstring is None: + idstring = '' + else: + idstring = '.' + idstring + idhost = socket.getfqdn() + msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, idhost) + return msgid + + + +# These functions are in the standalone mimelib version only because they've +# subsequently been fixed in the latest Python versions. We use this to worm +# around broken older Pythons. +def parsedate(data): + if not data: + return None + return _parsedate(data) + + +def parsedate_tz(data): + if not data: + return None + return _parsedate_tz(data) + + +def parseaddr(addr): + addrs = _AddressList(addr).addresslist + if not addrs: + return '', '' + return addrs[0] + + +# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. +def unquote(str): + """Remove quotes from a string.""" + if len(str) > 1: + if str.startswith('"') and str.endswith('"'): + return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') + if str.startswith('<') and str.endswith('>'): + return str[1:-1] + return str + + + +# RFC2231-related functions - parameter encoding and decoding +def decode_rfc2231(s): + """Decode string according to RFC 2231""" + parts = s.split(TICK, 2) + if len(parts) <= 2: + return None, None, s + return parts + + +def encode_rfc2231(s, charset=None, language=None): + """Encode string according to RFC 2231. + + If neither charset nor language is given, then s is returned as-is. If + charset is given but not language, the string is encoded using the empty + string for language. + """ + import urllib + s = urllib.quote(s, safe='') + if charset is None and language is None: + return s + if language is None: + language = '' + return "%s'%s'%s" % (charset, language, s) + + +rfc2231_continuation = re.compile(r'^(?P\w+)\*((?P[0-9]+)\*?)?$') + +def decode_params(params): + """Decode parameters list according to RFC 2231. + + params is a sequence of 2-tuples containing (param name, string value). + """ + # Copy params so we don't mess with the original + params = params[:] + new_params = [] + # Map parameter's name to a list of continuations. The values are a + # 3-tuple of the continuation number, the string value, and a flag + # specifying whether a particular segment is %-encoded. + rfc2231_params = {} + name, value = params.pop(0) + new_params.append((name, value)) + while params: + name, value = params.pop(0) + if name.endswith('*'): + encoded = True + else: + encoded = False + value = unquote(value) + mo = rfc2231_continuation.match(name) + if mo: + name, num = mo.group('name', 'num') + if num is not None: + num = int(num) + rfc2231_params.setdefault(name, []).append((num, value, encoded)) + else: + new_params.append((name, '"%s"' % quote(value))) + if rfc2231_params: + for name, continuations in rfc2231_params.items(): + value = [] + extended = False + # Sort by number + continuations.sort() + # And now append all values in numerical order, converting + # %-encodings for the encoded segments. If any of the + # continuation names ends in a *, then the entire string, after + # decoding segments and concatenating, must have the charset and + # language specifiers at the beginning of the string. + for num, s, encoded in continuations: + if encoded: + s = urllib.unquote(s) + extended = True + value.append(s) + value = quote(EMPTYSTRING.join(value)) + if extended: + charset, language, value = decode_rfc2231(value) + new_params.append((name, (charset, language, '"%s"' % value))) + else: + new_params.append((name, '"%s"' % value)) + return new_params + +def collapse_rfc2231_value(value, errors='replace', + fallback_charset='us-ascii'): + if isinstance(value, tuple): + rawval = unquote(value[2]) + charset = value[0] or 'us-ascii' + try: + return unicode(rawval, charset, errors) + except LookupError: + # XXX charset is unknown to Python. + return unicode(rawval, fallback_charset, errors) + else: + return unquote(value) diff --git a/third_party/stdlib/filecmp.py b/third_party/stdlib/filecmp.py new file mode 100644 index 00000000..3a793819 --- /dev/null +++ b/third_party/stdlib/filecmp.py @@ -0,0 +1,296 @@ +"""Utilities for comparing files and directories. + +Classes: + dircmp + +Functions: + cmp(f1, f2, shallow=1) -> int + cmpfiles(a, b, common) -> ([], [], []) + +""" + +import os +import stat +from itertools import ifilter, ifilterfalse, imap, izip + +__all__ = ["cmp","dircmp","cmpfiles"] + +_cache = {} +BUFSIZE=8*1024 + +def cmp(f1, f2, shallow=1): + """Compare two files. + + Arguments: + + f1 -- First file name + + f2 -- Second file name + + shallow -- Just check stat signature (do not read the files). + defaults to 1. + + Return value: + + True if the files are the same, False otherwise. + + This function uses a cache for past comparisons and the results, + with a cache invalidation mechanism relying on stale signatures. + + """ + + s1 = _sig(os.stat(f1)) + s2 = _sig(os.stat(f2)) + if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: + return False + if shallow and s1 == s2: + return True + if s1[1] != s2[1]: + return False + + outcome = _cache.get((f1, f2, s1, s2)) + if outcome is None: + outcome = _do_cmp(f1, f2) + if len(_cache) > 100: # limit the maximum size of the cache + _cache.clear() + _cache[f1, f2, s1, s2] = outcome + return outcome + +def _sig(st): + return (stat.S_IFMT(st.st_mode), + st.st_size, + st.st_mtime) + +def _do_cmp(f1, f2): + bufsize = BUFSIZE + with open(f1, 'rb') as fp1, open(f2, 'rb') as fp2: + while True: + b1 = fp1.read(bufsize) + b2 = fp2.read(bufsize) + if b1 != b2: + return False + if not b1: + return True + +# Directory comparison class. +# +class dircmp: + """A class that manages the comparison of 2 directories. + + dircmp(a,b,ignore=None,hide=None) + A and B are directories. + IGNORE is a list of names to ignore, + defaults to ['RCS', 'CVS', 'tags']. + HIDE is a list of names to hide, + defaults to [os.curdir, os.pardir]. + + High level usage: + x = dircmp(dir1, dir2) + x.report() -> prints a report on the differences between dir1 and dir2 + or + x.report_partial_closure() -> prints report on differences between dir1 + and dir2, and reports on common immediate subdirectories. + x.report_full_closure() -> like report_partial_closure, + but fully recursive. + + Attributes: + left_list, right_list: The files in dir1 and dir2, + filtered by hide and ignore. + common: a list of names in both dir1 and dir2. + left_only, right_only: names only in dir1, dir2. + common_dirs: subdirectories in both dir1 and dir2. + common_files: files in both dir1 and dir2. + common_funny: names in both dir1 and dir2 where the type differs between + dir1 and dir2, or the name is not stat-able. + same_files: list of identical files. + diff_files: list of filenames which differ. + funny_files: list of files which could not be compared. + subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. + """ + + def __init__(self, a, b, ignore=None, hide=None): # Initialize + self.left = a + self.right = b + if hide is None: + self.hide = [os.curdir, os.pardir] # Names never to be shown + else: + self.hide = hide + if ignore is None: + self.ignore = ['RCS', 'CVS', 'tags'] # Names ignored in comparison + else: + self.ignore = ignore + + def phase0(self): # Compare everything except common subdirectories + self.left_list = _filter(os.listdir(self.left), + self.hide+self.ignore) + self.right_list = _filter(os.listdir(self.right), + self.hide+self.ignore) + self.left_list.sort() + self.right_list.sort() + + def phase1(self): # Compute common names + a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list)) + b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list)) + self.common = map(a.__getitem__, ifilter(b.__contains__, a)) + self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a)) + self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b)) + + def phase2(self): # Distinguish files, directories, funnies + self.common_dirs = [] + self.common_files = [] + self.common_funny = [] + + for x in self.common: + a_path = os.path.join(self.left, x) + b_path = os.path.join(self.right, x) + + ok = 1 + try: + a_stat = os.stat(a_path) + except os.error, why: + # print 'Can\'t stat', a_path, ':', why[1] + ok = 0 + try: + b_stat = os.stat(b_path) + except os.error, why: + # print 'Can\'t stat', b_path, ':', why[1] + ok = 0 + + if ok: + a_type = stat.S_IFMT(a_stat.st_mode) + b_type = stat.S_IFMT(b_stat.st_mode) + if a_type != b_type: + self.common_funny.append(x) + elif stat.S_ISDIR(a_type): + self.common_dirs.append(x) + elif stat.S_ISREG(a_type): + self.common_files.append(x) + else: + self.common_funny.append(x) + else: + self.common_funny.append(x) + + def phase3(self): # Find out differences between common files + xx = cmpfiles(self.left, self.right, self.common_files) + self.same_files, self.diff_files, self.funny_files = xx + + def phase4(self): # Find out differences between common subdirectories + # A new dircmp object is created for each common subdirectory, + # these are stored in a dictionary indexed by filename. + # The hide and ignore properties are inherited from the parent + self.subdirs = {} + for x in self.common_dirs: + a_x = os.path.join(self.left, x) + b_x = os.path.join(self.right, x) + self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) + + def phase4_closure(self): # Recursively call phase4() on subdirectories + self.phase4() + for sd in self.subdirs.itervalues(): + sd.phase4_closure() + + def report(self): # Print a report on the differences between a and b + # Output format is purposely lousy + print 'diff', self.left, self.right + if self.left_only: + self.left_only.sort() + print 'Only in', self.left, ':', self.left_only + if self.right_only: + self.right_only.sort() + print 'Only in', self.right, ':', self.right_only + if self.same_files: + self.same_files.sort() + print 'Identical files :', self.same_files + if self.diff_files: + self.diff_files.sort() + print 'Differing files :', self.diff_files + if self.funny_files: + self.funny_files.sort() + print 'Trouble with common files :', self.funny_files + if self.common_dirs: + self.common_dirs.sort() + print 'Common subdirectories :', self.common_dirs + if self.common_funny: + self.common_funny.sort() + print 'Common funny cases :', self.common_funny + + def report_partial_closure(self): # Print reports on self and on subdirs + self.report() + for sd in self.subdirs.itervalues(): + print + sd.report() + + def report_full_closure(self): # Report on self and subdirs recursively + self.report() + for sd in self.subdirs.itervalues(): + print + sd.report_full_closure() + + methodmap = dict(subdirs=phase4, + same_files=phase3, diff_files=phase3, funny_files=phase3, + common_dirs = phase2, common_files=phase2, common_funny=phase2, + common=phase1, left_only=phase1, right_only=phase1, + left_list=phase0, right_list=phase0) + + def __getattr__(self, attr): + if attr not in self.methodmap: + raise AttributeError, attr + self.methodmap[attr](self) + return getattr(self, attr) + +def cmpfiles(a, b, common, shallow=1): + """Compare common files in two directories. + + a, b -- directory names + common -- list of file names found in both directories + shallow -- if true, do comparison based solely on stat() information + + Returns a tuple of three lists: + files that compare equal + files that are different + filenames that aren't regular files. + + """ + res = ([], [], []) + for x in common: + ax = os.path.join(a, x) + bx = os.path.join(b, x) + res[_cmp(ax, bx, shallow)].append(x) + return res + + +# Compare two files. +# Return: +# 0 for equal +# 1 for different +# 2 for funny cases (can't stat, etc.) +# +def _cmp(a, b, sh, abs=abs, cmp=cmp): + try: + return not abs(cmp(a, b, sh)) + except (os.error, IOError): + return 2 + + +# Return a copy with items that occur in skip removed. +# +def _filter(flist, skip): + return list(ifilterfalse(skip.__contains__, flist)) + + +# Demonstration and testing. +# +def demo(): + import sys + import getopt + options, args = getopt.getopt(sys.argv[1:], 'r') + if len(args) != 2: + raise getopt.GetoptError('need exactly two args', None) + dd = dircmp(args[0], args[1]) + if ('-r', '') in options: + dd.report_full_closure() + else: + dd.report() + +if __name__ == '__main__': + demo() diff --git a/third_party/stdlib/hashlib.py b/third_party/stdlib/hashlib.py new file mode 100644 index 00000000..bbd06b99 --- /dev/null +++ b/third_party/stdlib/hashlib.py @@ -0,0 +1,221 @@ +# $Id$ +# +# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org) +# Licensed to PSF under a Contributor Agreement. +# + +__doc__ = """hashlib module - A common interface to many hash functions. + +new(name, string='') - returns a new hash object implementing the + given hash function; initializing the hash + using the given string data. + +Named constructor functions are also available, these are much faster +than using new(): + +md5(), sha1(), sha224(), sha256(), sha384(), and sha512() + +More algorithms may be available on your platform but the above are guaranteed +to exist. See the algorithms_guaranteed and algorithms_available attributes +to find out what algorithm names can be passed to new(). + +NOTE: If you want the adler32 or crc32 hash functions they are available in +the zlib module. + +Choose your hash function wisely. Some have known collision weaknesses. +sha384 and sha512 will be slow on 32 bit platforms. + +Hash objects have these methods: + - update(arg): Update the hash object with the string arg. Repeated calls + are equivalent to a single call with the concatenation of all + the arguments. + - digest(): Return the digest of the strings passed to the update() method + so far. This may contain non-ASCII characters, including + NUL bytes. + - hexdigest(): Like digest() except the digest is returned as a string of + double length, containing only hexadecimal digits. + - copy(): Return a copy (clone) of the hash object. This can be used to + efficiently compute the digests of strings that share a common + initial substring. + +For example, to obtain the digest of the string 'Nobody inspects the +spammish repetition': + + >>> import hashlib + >>> m = hashlib.md5() + >>> m.update("Nobody inspects") + >>> m.update(" the spammish repetition") + >>> m.digest() + '\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9' + +More condensed: + + >>> hashlib.sha224("Nobody inspects the spammish repetition").hexdigest() + 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' + +""" + +# This tuple and __get_builtin_constructor() must be modified if a new +# always available algorithm is added. +__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512') + +algorithms_guaranteed = set(__always_supported) +algorithms_available = set(__always_supported) + +algorithms = __always_supported + +__all__ = __always_supported + ('new', 'algorithms_guaranteed', + 'algorithms_available', 'algorithms', + 'pbkdf2_hmac') + + +def __get_builtin_constructor(name): + try: + if name in ('SHA1', 'sha1'): + import _sha + return _sha.new + elif name in ('MD5', 'md5'): + import _md5 + return _md5.new + elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): + import _sha256 + bs = name[3:] + if bs == '256': + return _sha256.sha256 + elif bs == '224': + return _sha256.sha224 + elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): + import _sha512 + bs = name[3:] + if bs == '512': + return _sha512.sha512 + elif bs == '384': + return _sha512.sha384 + except ImportError: + pass # no extension module, this hash is unsupported. + + raise ValueError('unsupported hash type ' + name) + + +def __get_openssl_constructor(name): + try: + f = getattr(_hashlib, 'openssl_' + name) + # Allow the C module to raise ValueError. The function will be + # defined but the hash not actually available thanks to OpenSSL. + f() + # Use the C function directly (very fast) + return f + except (AttributeError, ValueError): + return __get_builtin_constructor(name) + + +def __py_new(name, string=''): + """new(name, string='') - Return a new hashing object using the named algorithm; + optionally initialized with a string. + """ + return __get_builtin_constructor(name)(string) + + +def __hash_new(name, string=''): + """new(name, string='') - Return a new hashing object using the named algorithm; + optionally initialized with a string. + """ + try: + return _hashlib.new(name, string) + except ValueError: + # If the _hashlib module (OpenSSL) doesn't support the named + # hash, try using our builtin implementations. + # This allows for SHA224/256 and SHA384/512 support even though + # the OpenSSL library prior to 0.9.8 doesn't provide them. + return __get_builtin_constructor(name)(string) + + +try: + import _hashlib + new = __hash_new + __get_hash = __get_openssl_constructor + algorithms_available = algorithms_available.union( + _hashlib.openssl_md_meth_names) +except ImportError: + new = __py_new + __get_hash = __get_builtin_constructor + +for __func_name in __always_supported: + # try them all, some may not work due to the OpenSSL + # version not supporting that algorithm. + try: + globals()[__func_name] = __get_hash(__func_name) + except ValueError: + import logging + logging.exception('code for hash %s was not found.', __func_name) + + +try: + # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA + from _hashlib import pbkdf2_hmac +except ImportError: + import binascii + import struct + + _trans_5C = b"".join(chr(x ^ 0x5C) for x in range(256)) + _trans_36 = b"".join(chr(x ^ 0x36) for x in range(256)) + + def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None): + """Password based key derivation function 2 (PKCS #5 v2.0) + + This Python implementations based on the hmac module about as fast + as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster + for long passwords. + """ + if not isinstance(hash_name, str): + raise TypeError(hash_name) + + if not isinstance(password, (bytes, bytearray)): + password = bytes(buffer(password)) + if not isinstance(salt, (bytes, bytearray)): + salt = bytes(buffer(salt)) + + # Fast inline HMAC implementation + inner = new(hash_name) + outer = new(hash_name) + blocksize = getattr(inner, 'block_size', 64) + if len(password) > blocksize: + password = new(hash_name, password).digest() + password = password + b'\x00' * (blocksize - len(password)) + inner.update(password.translate(_trans_36)) + outer.update(password.translate(_trans_5C)) + + def prf(msg, inner=inner, outer=outer): + # PBKDF2_HMAC uses the password as key. We can re-use the same + # digest objects and just update copies to skip initialization. + icpy = inner.copy() + ocpy = outer.copy() + icpy.update(msg) + ocpy.update(icpy.digest()) + return ocpy.digest() + + if iterations < 1: + raise ValueError(iterations) + if dklen is None: + dklen = outer.digest_size + if dklen < 1: + raise ValueError(dklen) + + hex_format_string = "%%0%ix" % (new(hash_name).digest_size * 2) + + dkey = b'' + loop = 1 + while len(dkey) < dklen: + prev = prf(salt + struct.pack(b'>I', loop)) + rkey = int(binascii.hexlify(prev), 16) + for i in xrange(iterations - 1): + prev = prf(prev) + rkey ^= int(binascii.hexlify(prev), 16) + loop += 1 + dkey += binascii.unhexlify(hex_format_string % rkey) + + return dkey[:dklen] + +# Cleanup locals() +del __always_supported, __func_name, __get_hash +del __py_new, __hash_new, __get_openssl_constructor diff --git a/third_party/stdlib/io.py b/third_party/stdlib/io.py new file mode 100644 index 00000000..14384930 --- /dev/null +++ b/third_party/stdlib/io.py @@ -0,0 +1,90 @@ +"""The io module provides the Python interfaces to stream handling. The +builtin open function is defined in this module. + +At the top of the I/O hierarchy is the abstract base class IOBase. It +defines the basic interface to a stream. Note, however, that there is no +separation between reading and writing to streams; implementations are +allowed to raise an IOError if they do not support a given operation. + +Extending IOBase is RawIOBase which deals simply with the reading and +writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide +an interface to OS files. + +BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its +subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer +streams that are readable, writable, and both respectively. +BufferedRandom provides a buffered interface to random access +streams. BytesIO is a simple stream of in-memory bytes. + +Another IOBase subclass, TextIOBase, deals with the encoding and decoding +of streams into text. TextIOWrapper, which extends it, is a buffered text +interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO +is a in-memory stream for text. + +Argument names are not part of the specification, and only the arguments +of open() are intended to be used as keyword arguments. + +data: + +DEFAULT_BUFFER_SIZE + + An int containing the default buffer size used by the module's buffered + I/O classes. open() uses the file's blksize (as obtained by os.stat) if + possible. +""" +# New I/O library conforming to PEP 3116. + +__author__ = ("Guido van Rossum , " + "Mike Verdone , " + "Mark Russell , " + "Antoine Pitrou , " + "Amaury Forgeot d'Arc , " + "Benjamin Peterson ") + +__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO", + "BytesIO", "StringIO", "BufferedIOBase", + "BufferedReader", "BufferedWriter", "BufferedRWPair", + "BufferedRandom", "TextIOBase", "TextIOWrapper", + "UnsupportedOperation", "SEEK_SET", "SEEK_CUR", "SEEK_END"] + + +import _io +import abc + +from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, + open, FileIO, BytesIO, StringIO, BufferedReader, + BufferedWriter, BufferedRWPair, BufferedRandom, + IncrementalNewlineDecoder, TextIOWrapper) + +OpenWrapper = _io.open # for compatibility with _pyio + +# for seek() +SEEK_SET = 0 +SEEK_CUR = 1 +SEEK_END = 2 + +# Declaring ABCs in C is tricky so we do it here. +# Method descriptions and default implementations are inherited from the C +# version however. +class IOBase(_io._IOBase): + __metaclass__ = abc.ABCMeta + __doc__ = _io._IOBase.__doc__ + +class RawIOBase(_io._RawIOBase, IOBase): + __doc__ = _io._RawIOBase.__doc__ + +class BufferedIOBase(_io._BufferedIOBase, IOBase): + __doc__ = _io._BufferedIOBase.__doc__ + +class TextIOBase(_io._TextIOBase, IOBase): + __doc__ = _io._TextIOBase.__doc__ + +RawIOBase.register(FileIO) + +for klass in (BytesIO, BufferedReader, BufferedWriter, BufferedRandom, + BufferedRWPair): + BufferedIOBase.register(klass) + +for klass in (StringIO, TextIOWrapper): + TextIOBase.register(klass) +del klass diff --git a/third_party/stdlib/shlex.py b/third_party/stdlib/shlex.py new file mode 100644 index 00000000..e7c8accd --- /dev/null +++ b/third_party/stdlib/shlex.py @@ -0,0 +1,292 @@ +# -*- coding: iso-8859-1 -*- +"""A lexical analyzer class for simple shell-like syntaxes.""" + +# Module and documentation by Eric S. Raymond, 21 Dec 1998 +# Input stacking and error message cleanup added by ESR, March 2000 +# push_source() and pop_source() made explicit by ESR, January 2001. +# Posix compliance, split(), string arguments, and +# iterator interface by Gustavo Niemeyer, April 2003. + +import os.path +import sys +from collections import deque + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +__all__ = ["shlex", "split"] + +class shlex: + "A lexical analyzer class for simple shell-like syntaxes." + def __init__(self, instream=None, infile=None, posix=False): + if isinstance(instream, basestring): + instream = StringIO(instream) + if instream is not None: + self.instream = instream + self.infile = infile + else: + self.instream = sys.stdin + self.infile = None + self.posix = posix + if posix: + self.eof = None + else: + self.eof = '' + self.commenters = '#' + self.wordchars = ('abcdfeghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_') + if self.posix: + self.wordchars += ('��������������������������������' + '������������������������������') + self.whitespace = ' \t\r\n' + self.whitespace_split = False + self.quotes = '\'"' + self.escape = '\\' + self.escapedquotes = '"' + self.state = ' ' + self.pushback = deque() + self.lineno = 1 + self.debug = 0 + self.token = '' + self.filestack = deque() + self.source = None + if self.debug: + print 'shlex: reading from %s, line %d' \ + % (self.instream, self.lineno) + + def push_token(self, tok): + "Push a token onto the stack popped by the get_token method" + if self.debug >= 1: + print "shlex: pushing token " + repr(tok) + self.pushback.appendleft(tok) + + def push_source(self, newstream, newfile=None): + "Push an input source onto the lexer's input source stack." + if isinstance(newstream, basestring): + newstream = StringIO(newstream) + self.filestack.appendleft((self.infile, self.instream, self.lineno)) + self.infile = newfile + self.instream = newstream + self.lineno = 1 + if self.debug: + if newfile is not None: + print 'shlex: pushing to file %s' % (self.infile,) + else: + print 'shlex: pushing to stream %s' % (self.instream,) + + def pop_source(self): + "Pop the input source stack." + self.instream.close() + (self.infile, self.instream, self.lineno) = self.filestack.popleft() + if self.debug: + print 'shlex: popping to %s, line %d' \ + % (self.instream, self.lineno) + self.state = ' ' + + def get_token(self): + "Get a token from the input stream (or from stack if it's nonempty)" + if self.pushback: + tok = self.pushback.popleft() + if self.debug >= 1: + print "shlex: popping token " + repr(tok) + return tok + # No pushback. Get a token. + raw = self.read_token() + # Handle inclusions + if self.source is not None: + while raw == self.source: + spec = self.sourcehook(self.read_token()) + if spec: + (newfile, newstream) = spec + self.push_source(newstream, newfile) + raw = self.get_token() + # Maybe we got EOF instead? + while raw == self.eof: + if not self.filestack: + return self.eof + else: + self.pop_source() + raw = self.get_token() + # Neither inclusion nor EOF + if self.debug >= 1: + if raw != self.eof: + print "shlex: token=" + repr(raw) + else: + print "shlex: token=EOF" + return raw + + def read_token(self): + quoted = False + escapedstate = ' ' + while True: + nextchar = self.instream.read(1) + if nextchar == '\n': + self.lineno = self.lineno + 1 + if self.debug >= 3: + print "shlex: in state", repr(self.state), \ + "I see character:", repr(nextchar) + if self.state is None: + self.token = '' # past end of file + break + elif self.state == ' ': + if not nextchar: + self.state = None # end of file + break + elif nextchar in self.whitespace: + if self.debug >= 2: + print "shlex: I see whitespace in whitespace state" + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + elif nextchar in self.commenters: + self.instream.readline() + self.lineno = self.lineno + 1 + elif self.posix and nextchar in self.escape: + escapedstate = 'a' + self.state = nextchar + elif nextchar in self.wordchars: + self.token = nextchar + self.state = 'a' + elif nextchar in self.quotes: + if not self.posix: + self.token = nextchar + self.state = nextchar + elif self.whitespace_split: + self.token = nextchar + self.state = 'a' + else: + self.token = nextchar + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + elif self.state in self.quotes: + quoted = True + if not nextchar: # end of file + if self.debug >= 2: + print "shlex: I see EOF in quotes state" + # XXX what error should be raised here? + raise ValueError, "No closing quotation" + if nextchar == self.state: + if not self.posix: + self.token = self.token + nextchar + self.state = ' ' + break + else: + self.state = 'a' + elif self.posix and nextchar in self.escape and \ + self.state in self.escapedquotes: + escapedstate = self.state + self.state = nextchar + else: + self.token = self.token + nextchar + elif self.state in self.escape: + if not nextchar: # end of file + if self.debug >= 2: + print "shlex: I see EOF in escape state" + # XXX what error should be raised here? + raise ValueError, "No escaped character" + # In posix shells, only the quote itself or the escape + # character may be escaped within quotes. + if escapedstate in self.quotes and \ + nextchar != self.state and nextchar != escapedstate: + self.token = self.token + self.state + self.token = self.token + nextchar + self.state = escapedstate + elif self.state == 'a': + if not nextchar: + self.state = None # end of file + break + elif nextchar in self.whitespace: + if self.debug >= 2: + print "shlex: I see whitespace in word state" + self.state = ' ' + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + elif nextchar in self.commenters: + self.instream.readline() + self.lineno = self.lineno + 1 + if self.posix: + self.state = ' ' + if self.token or (self.posix and quoted): + break # emit current token + else: + continue + elif self.posix and nextchar in self.quotes: + self.state = nextchar + elif self.posix and nextchar in self.escape: + escapedstate = 'a' + self.state = nextchar + elif nextchar in self.wordchars or nextchar in self.quotes \ + or self.whitespace_split: + self.token = self.token + nextchar + else: + self.pushback.appendleft(nextchar) + if self.debug >= 2: + print "shlex: I see punctuation in word state" + self.state = ' ' + if self.token: + break # emit current token + else: + continue + result = self.token + self.token = '' + if self.posix and not quoted and result == '': + result = None + if self.debug > 1: + if result: + print "shlex: raw token=" + repr(result) + else: + print "shlex: raw token=EOF" + return result + + def sourcehook(self, newfile): + "Hook called on a filename to be sourced." + if newfile[0] == '"': + newfile = newfile[1:-1] + # This implements cpp-like semantics for relative-path inclusion. + if isinstance(self.infile, basestring) and not os.path.isabs(newfile): + newfile = os.path.join(os.path.dirname(self.infile), newfile) + return (newfile, open(newfile, "r")) + + def error_leader(self, infile=None, lineno=None): + "Emit a C-compiler-like, Emacs-friendly error-message leader." + if infile is None: + infile = self.infile + if lineno is None: + lineno = self.lineno + return "\"%s\", line %d: " % (infile, lineno) + + def __iter__(self): + return self + + def next(self): + token = self.get_token() + if token == self.eof: + raise StopIteration + return token + +def split(s, comments=False, posix=True): + lex = shlex(s, posix=posix) + lex.whitespace_split = True + if not comments: + lex.commenters = '' + return list(lex) + +if __name__ == '__main__': + if len(sys.argv) == 1: + lexer = shlex() + else: + file = sys.argv[1] + lexer = shlex(open(file), file) + while 1: + tt = lexer.get_token() + if tt: + print "Token: " + repr(tt) + else: + break diff --git a/third_party/stdlib/shutil.py b/third_party/stdlib/shutil.py new file mode 100644 index 00000000..26dcfde7 --- /dev/null +++ b/third_party/stdlib/shutil.py @@ -0,0 +1,564 @@ +"""Utility functions for copying and archiving files and directory trees. + +XXX The functions here don't copy the resource fork or other metadata on Mac. + +""" + +import os +import sys +import stat +from os.path import abspath +import fnmatch +import collections +import errno + +try: + from pwd import getpwnam +except ImportError: + getpwnam = None + +try: + from grp import getgrnam +except ImportError: + getgrnam = None + +__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", + "copytree", "move", "rmtree", "Error", "SpecialFileError", + "ExecError", "make_archive", "get_archive_formats", + "register_archive_format", "unregister_archive_format", + "ignore_patterns"] + +class Error(EnvironmentError): + pass + +class SpecialFileError(EnvironmentError): + """Raised when trying to do a kind of operation (e.g. copying) which is + not supported on a special file (e.g. a named pipe)""" + +class ExecError(EnvironmentError): + """Raised when a command could not be executed""" + +try: + WindowsError +except NameError: + WindowsError = None + +def copyfileobj(fsrc, fdst, length=16*1024): + """copy data from file-like object fsrc to file-like object fdst""" + while 1: + buf = fsrc.read(length) + if not buf: + break + fdst.write(buf) + +def _samefile(src, dst): + # Macintosh, Unix. + if hasattr(os.path, 'samefile'): + try: + return os.path.samefile(src, dst) + except OSError: + return False + + # All other platforms: check for same pathname. + return (os.path.normcase(os.path.abspath(src)) == + os.path.normcase(os.path.abspath(dst))) + +def copyfile(src, dst): + """Copy data from src to dst""" + if _samefile(src, dst): + raise Error("`%s` and `%s` are the same file" % (src, dst)) + + for fn in [src, dst]: + try: + st = os.stat(fn) + except OSError: + # File most likely does not exist + pass + else: + # XXX What about other special files? (sockets, devices...) + if stat.S_ISFIFO(st.st_mode): + raise SpecialFileError("`%s` is a named pipe" % fn) + + with open(src, 'rb') as fsrc: + with open(dst, 'wb') as fdst: + copyfileobj(fsrc, fdst) + +def copymode(src, dst): + """Copy mode bits from src to dst""" + if hasattr(os, 'chmod'): + st = os.stat(src) + mode = stat.S_IMODE(st.st_mode) + os.chmod(dst, mode) + +def copystat(src, dst): + """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" + st = os.stat(src) + mode = stat.S_IMODE(st.st_mode) + if hasattr(os, 'utime'): + os.utime(dst, (st.st_atime, st.st_mtime)) + if hasattr(os, 'chmod'): + os.chmod(dst, mode) + if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): + try: + os.chflags(dst, st.st_flags) + except OSError, why: + for err in 'EOPNOTSUPP', 'ENOTSUP': + if hasattr(errno, err) and why.errno == getattr(errno, err): + break + else: + raise + +def copy(src, dst): + """Copy data and mode bits ("cp src dst"). + + The destination may be a directory. + + """ + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + copyfile(src, dst) + copymode(src, dst) + +def copy2(src, dst): + """Copy data and all stat info ("cp -p src dst"). + + The destination may be a directory. + + """ + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + copyfile(src, dst) + copystat(src, dst) + +def ignore_patterns(*patterns): + """Function that can be used as copytree() ignore parameter. + + Patterns is a sequence of glob-style patterns + that are used to exclude files""" + def _ignore_patterns(path, names): + ignored_names = [] + for pattern in patterns: + ignored_names.extend(fnmatch.filter(names, pattern)) + return set(ignored_names) + return _ignore_patterns + +def copytree(src, dst, symlinks=False, ignore=None): + """Recursively copy a directory tree using copy2(). + + The destination directory must not already exist. + If exception(s) occur, an Error is raised with a list of reasons. + + If the optional symlinks flag is true, symbolic links in the + source tree result in symbolic links in the destination tree; if + it is false, the contents of the files pointed to by symbolic + links are copied. + + The optional ignore argument is a callable. If given, it + is called with the `src` parameter, which is the directory + being visited by copytree(), and `names` which is the list of + `src` contents, as returned by os.listdir(): + + callable(src, names) -> ignored_names + + Since copytree() is called recursively, the callable will be + called once for each directory that is copied. It returns a + list of names relative to the `src` directory that should + not be copied. + + XXX Consider this example code rather than the ultimate tool. + + """ + names = os.listdir(src) + if ignore is not None: + ignored_names = ignore(src, names) + else: + ignored_names = set() + + os.makedirs(dst) + errors = [] + for name in names: + if name in ignored_names: + continue + srcname = os.path.join(src, name) + dstname = os.path.join(dst, name) + try: + if symlinks and os.path.islink(srcname): + linkto = os.readlink(srcname) + os.symlink(linkto, dstname) + elif os.path.isdir(srcname): + copytree(srcname, dstname, symlinks, ignore) + else: + # Will raise a SpecialFileError for unsupported file types + copy2(srcname, dstname) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error, err: + errors.extend(err.args[0]) + except EnvironmentError, why: + errors.append((srcname, dstname, str(why))) + try: + copystat(src, dst) + except OSError, why: + if WindowsError is not None and isinstance(why, WindowsError): + # Copying file access times may fail on Windows + pass + else: + errors.append((src, dst, str(why))) + if errors: + raise Error, errors + +def rmtree(path, ignore_errors=False, onerror=None): + """Recursively delete a directory tree. + + If ignore_errors is set, errors are ignored; otherwise, if onerror + is set, it is called to handle the error with arguments (func, + path, exc_info) where func is os.listdir, os.remove, or os.rmdir; + path is the argument to that function that caused it to fail; and + exc_info is a tuple returned by sys.exc_info(). If ignore_errors + is false and onerror is None, an exception is raised. + + """ + if ignore_errors: + def onerror(*args): + pass + elif onerror is None: + def onerror(*args): + raise + try: + if os.path.islink(path): + # symlinks to directories are forbidden, see bug #1669 + raise OSError("Cannot call rmtree on a symbolic link") + except OSError: + onerror(os.path.islink, path, sys.exc_info()) + # can't continue even if onerror hook returns + return + names = [] + try: + names = os.listdir(path) + except os.error, err: + onerror(os.listdir, path, sys.exc_info()) + for name in names: + fullname = os.path.join(path, name) + try: + mode = os.lstat(fullname).st_mode + except os.error: + mode = 0 + if stat.S_ISDIR(mode): + rmtree(fullname, ignore_errors, onerror) + else: + try: + os.remove(fullname) + except os.error, err: + onerror(os.remove, fullname, sys.exc_info()) + try: + os.rmdir(path) + except os.error: + onerror(os.rmdir, path, sys.exc_info()) + + +def _basename(path): + # A basename() variant which first strips the trailing slash, if present. + # Thus we always get the last component of the path, even for directories. + sep = os.path.sep + (os.path.altsep or '') + return os.path.basename(path.rstrip(sep)) + +def move(src, dst): + """Recursively move a file or directory to another location. This is + similar to the Unix "mv" command. + + If the destination is a directory or a symlink to a directory, the source + is moved inside the directory. The destination path must not already + exist. + + If the destination already exists but is not a directory, it may be + overwritten depending on os.rename() semantics. + + If the destination is on our current filesystem, then rename() is used. + Otherwise, src is copied to the destination and then removed. + A lot more could be done here... A look at a mv.c shows a lot of + the issues this implementation glosses over. + + """ + real_dst = dst + if os.path.isdir(dst): + if _samefile(src, dst): + # We might be on a case insensitive filesystem, + # perform the rename anyway. + os.rename(src, dst) + return + + real_dst = os.path.join(dst, _basename(src)) + if os.path.exists(real_dst): + raise Error, "Destination path '%s' already exists" % real_dst + try: + os.rename(src, real_dst) + except OSError: + if os.path.isdir(src): + if _destinsrc(src, dst): + raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst) + copytree(src, real_dst, symlinks=True) + rmtree(src) + else: + copy2(src, real_dst) + os.unlink(src) + +def _destinsrc(src, dst): + src = abspath(src) + dst = abspath(dst) + if not src.endswith(os.path.sep): + src += os.path.sep + if not dst.endswith(os.path.sep): + dst += os.path.sep + return dst.startswith(src) + +def _get_gid(name): + """Returns a gid, given a group name.""" + if getgrnam is None or name is None: + return None + try: + result = getgrnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + +def _get_uid(name): + """Returns an uid, given a user name.""" + if getpwnam is None or name is None: + return None + try: + result = getpwnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + +def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, + owner=None, group=None, logger=None): + """Create a (possibly compressed) tar file from all the files under + 'base_dir'. + + 'compress' must be "gzip" (the default), "bzip2", or None. + + 'owner' and 'group' can be used to define an owner and a group for the + archive that is being built. If not provided, the current owner and group + will be used. + + The output tar file will be named 'base_name' + ".tar", possibly plus + the appropriate compression extension (".gz", or ".bz2"). + + Returns the output filename. + """ + tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''} + compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'} + + # flags for compression program, each element of list will be an argument + if compress is not None and compress not in compress_ext.keys(): + raise ValueError, \ + ("bad value for 'compress': must be None, 'gzip' or 'bzip2'") + + archive_name = base_name + '.tar' + compress_ext.get(compress, '') + archive_dir = os.path.dirname(archive_name) + + if archive_dir and not os.path.exists(archive_dir): + if logger is not None: + logger.info("creating %s", archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + + # creating the tarball + import tarfile # late import so Python build itself doesn't break + + if logger is not None: + logger.info('Creating tar archive') + + uid = _get_uid(owner) + gid = _get_gid(group) + + def _set_uid_gid(tarinfo): + if gid is not None: + tarinfo.gid = gid + tarinfo.gname = group + if uid is not None: + tarinfo.uid = uid + tarinfo.uname = owner + return tarinfo + + if not dry_run: + tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) + try: + tar.add(base_dir, filter=_set_uid_gid) + finally: + tar.close() + + return archive_name + +def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): + # XXX see if we want to keep an external call here + if verbose: + zipoptions = "-r" + else: + zipoptions = "-rq" + from distutils.errors import DistutilsExecError + from distutils.spawn import spawn + try: + spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) + except DistutilsExecError: + # XXX really should distinguish between "couldn't find + # external 'zip' command" and "zip failed". + raise ExecError, \ + ("unable to create zip file '%s': " + "could neither import the 'zipfile' module nor " + "find a standalone zip utility") % zip_filename + +def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): + """Create a zip file from all the files under 'base_dir'. + + The output zip file will be named 'base_name' + ".zip". Uses either the + "zipfile" Python module (if available) or the InfoZIP "zip" utility + (if installed and found on the default search path). If neither tool is + available, raises ExecError. Returns the name of the output zip + file. + """ + zip_filename = base_name + ".zip" + archive_dir = os.path.dirname(base_name) + + if archive_dir and not os.path.exists(archive_dir): + if logger is not None: + logger.info("creating %s", archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + # If zipfile module is not available, try spawning an external 'zip' + # command. + try: + import zipfile + except ImportError: + zipfile = None + + if zipfile is None: + _call_external_zip(base_dir, zip_filename, verbose, dry_run) + else: + if logger is not None: + logger.info("creating '%s' and adding '%s' to it", + zip_filename, base_dir) + + if not dry_run: + with zipfile.ZipFile(zip_filename, "w", + compression=zipfile.ZIP_DEFLATED) as zf: + path = os.path.normpath(base_dir) + zf.write(path, path) + if logger is not None: + logger.info("adding '%s'", path) + for dirpath, dirnames, filenames in os.walk(base_dir): + for name in sorted(dirnames): + path = os.path.normpath(os.path.join(dirpath, name)) + zf.write(path, path) + if logger is not None: + logger.info("adding '%s'", path) + for name in filenames: + path = os.path.normpath(os.path.join(dirpath, name)) + if os.path.isfile(path): + zf.write(path, path) + if logger is not None: + logger.info("adding '%s'", path) + + return zip_filename + +_ARCHIVE_FORMATS = { + 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), + 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), + 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), + 'zip': (_make_zipfile, [],"ZIP file") + } + +def get_archive_formats(): + """Returns a list of supported formats for archiving and unarchiving. + + Each element of the returned sequence is a tuple (name, description) + """ + formats = [(name, registry[2]) for name, registry in + _ARCHIVE_FORMATS.items()] + formats.sort() + return formats + +def register_archive_format(name, function, extra_args=None, description=''): + """Registers an archive format. + + name is the name of the format. function is the callable that will be + used to create archives. If provided, extra_args is a sequence of + (name, value) tuples that will be passed as arguments to the callable. + description can be provided to describe the format, and will be returned + by the get_archive_formats() function. + """ + if extra_args is None: + extra_args = [] + if not isinstance(function, collections.Callable): + raise TypeError('The %s object is not callable' % function) + if not isinstance(extra_args, (tuple, list)): + raise TypeError('extra_args needs to be a sequence') + for element in extra_args: + if not isinstance(element, (tuple, list)) or len(element) !=2 : + raise TypeError('extra_args elements are : (arg_name, value)') + + _ARCHIVE_FORMATS[name] = (function, extra_args, description) + +def unregister_archive_format(name): + del _ARCHIVE_FORMATS[name] + +def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, + dry_run=0, owner=None, group=None, logger=None): + """Create an archive file (eg. zip or tar). + + 'base_name' is the name of the file to create, minus any format-specific + extension; 'format' is the archive format: one of "zip", "tar", "bztar" + or "gztar". + + 'root_dir' is a directory that will be the root directory of the + archive; ie. we typically chdir into 'root_dir' before creating the + archive. 'base_dir' is the directory where we start archiving from; + ie. 'base_dir' will be the common prefix of all files and + directories in the archive. 'root_dir' and 'base_dir' both default + to the current directory. Returns the name of the archive file. + + 'owner' and 'group' are used when creating a tar archive. By default, + uses the current owner and group. + """ + save_cwd = os.getcwd() + if root_dir is not None: + if logger is not None: + logger.debug("changing into '%s'", root_dir) + base_name = os.path.abspath(base_name) + if not dry_run: + os.chdir(root_dir) + + if base_dir is None: + base_dir = os.curdir + + kwargs = {'dry_run': dry_run, 'logger': logger} + + try: + format_info = _ARCHIVE_FORMATS[format] + except KeyError: + raise ValueError, "unknown archive format '%s'" % format + + func = format_info[0] + for arg, val in format_info[1]: + kwargs[arg] = val + + if format != 'zip': + kwargs['owner'] = owner + kwargs['group'] = group + + try: + filename = func(base_name, base_dir, **kwargs) + finally: + if root_dir is not None: + if logger is not None: + logger.debug("changing back to '%s'", save_cwd) + os.chdir(save_cwd) + + return filename diff --git a/third_party/stdlib/socket.py b/third_party/stdlib/socket.py new file mode 100644 index 00000000..437634cc --- /dev/null +++ b/third_party/stdlib/socket.py @@ -0,0 +1,577 @@ +# Wrapper module for _socket, providing some additional facilities +# implemented in Python. + +"""\ +This module provides socket operations and some related functions. +On Unix, it supports IP (Internet Protocol) and Unix domain sockets. +On other systems, it only supports IP. Functions specific for a +socket are available as methods of the socket object. + +Functions: + +socket() -- create a new socket object +socketpair() -- create a pair of new socket objects [*] +fromfd() -- create a socket object from an open file descriptor [*] +gethostname() -- return the current hostname +gethostbyname() -- map a hostname to its IP number +gethostbyaddr() -- map an IP number or hostname to DNS info +getservbyname() -- map a service name and a protocol name to a port number +getprotobyname() -- map a protocol name (e.g. 'tcp') to a number +ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order +htons(), htonl() -- convert 16, 32 bit int from host to network byte order +inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format +inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89) +ssl() -- secure socket layer support (only available if configured) +socket.getdefaulttimeout() -- get the default timeout value +socket.setdefaulttimeout() -- set the default timeout value +create_connection() -- connects to an address, with an optional timeout and + optional source address. + + [*] not available on all platforms! + +Special objects: + +SocketType -- type object for socket objects +error -- exception raised for I/O errors +has_ipv6 -- boolean value indicating if IPv6 is supported + +Integer constants: + +AF_INET, AF_UNIX -- socket domains (first argument to socket() call) +SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument) + +Many other constants may be defined; these may be used in calls to +the setsockopt() and getsockopt() methods. +""" + +import _socket +from _socket import * +from functools import partial +from types import MethodType + +try: + import _ssl +except ImportError: + # no SSL support + pass +else: + def ssl(sock, keyfile=None, certfile=None): + # we do an internal import here because the ssl + # module imports the socket module + import ssl as _realssl + warnings.warn("socket.ssl() is deprecated. Use ssl.wrap_socket() instead.", + DeprecationWarning, stacklevel=2) + return _realssl.sslwrap_simple(sock, keyfile, certfile) + + # we need to import the same constants we used to... + from _ssl import SSLError as sslerror + from _ssl import \ + RAND_add, \ + RAND_status, \ + SSL_ERROR_ZERO_RETURN, \ + SSL_ERROR_WANT_READ, \ + SSL_ERROR_WANT_WRITE, \ + SSL_ERROR_WANT_X509_LOOKUP, \ + SSL_ERROR_SYSCALL, \ + SSL_ERROR_SSL, \ + SSL_ERROR_WANT_CONNECT, \ + SSL_ERROR_EOF, \ + SSL_ERROR_INVALID_ERROR_CODE + try: + from _ssl import RAND_egd + except ImportError: + # LibreSSL does not provide RAND_egd + pass + +import os, sys, warnings + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +try: + import errno +except ImportError: + errno = None +EBADF = getattr(errno, 'EBADF', 9) +EINTR = getattr(errno, 'EINTR', 4) + +__all__ = ["getfqdn", "create_connection"] +__all__.extend(os._get_exports_list(_socket)) + + +_realsocket = socket + +# WSA error codes +if sys.platform.lower().startswith("win"): + errorTab = {} + errorTab[10004] = "The operation was interrupted." + errorTab[10009] = "A bad file handle was passed." + errorTab[10013] = "Permission denied." + errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT + errorTab[10022] = "An invalid operation was attempted." + errorTab[10035] = "The socket operation would block" + errorTab[10036] = "A blocking operation is already in progress." + errorTab[10048] = "The network address is in use." + errorTab[10054] = "The connection has been reset." + errorTab[10058] = "The network has been shut down." + errorTab[10060] = "The operation timed out." + errorTab[10061] = "Connection refused." + errorTab[10063] = "The name is too long." + errorTab[10064] = "The host is down." + errorTab[10065] = "The host is unreachable." + __all__.append("errorTab") + + + +def getfqdn(name=''): + """Get fully qualified domain name from name. + + An empty argument is interpreted as meaning the local host. + + First the hostname returned by gethostbyaddr() is checked, then + possibly existing aliases. In case no FQDN is available, hostname + from gethostname() is returned. + """ + name = name.strip() + if not name or name == '0.0.0.0': + name = gethostname() + try: + hostname, aliases, ipaddrs = gethostbyaddr(name) + except error: + pass + else: + aliases.insert(0, hostname) + for name in aliases: + if '.' in name: + break + else: + name = hostname + return name + + +_socketmethods = ( + 'bind', 'connect', 'connect_ex', 'fileno', 'listen', + 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', + 'sendall', 'setblocking', + 'settimeout', 'gettimeout', 'shutdown') + +if os.name == "nt": + _socketmethods = _socketmethods + ('ioctl',) + +if sys.platform == "riscos": + _socketmethods = _socketmethods + ('sleeptaskw',) + +# All the method names that must be delegated to either the real socket +# object or the _closedsocket object. +_delegate_methods = ("recv", "recvfrom", "recv_into", "recvfrom_into", + "send", "sendto") + +class _closedsocket(object): + __slots__ = [] + def _dummy(*args): + raise error(EBADF, 'Bad file descriptor') + # All _delegate_methods must also be initialized here. + send = recv = recv_into = sendto = recvfrom = recvfrom_into = _dummy + __getattr__ = _dummy + +# Wrapper around platform socket objects. This implements +# a platform-independent dup() functionality. The +# implementation currently relies on reference counting +# to close the underlying socket object. +class _socketobject(object): + + __doc__ = _realsocket.__doc__ + + __slots__ = ["_sock", "__weakref__"] + list(_delegate_methods) + + def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, _sock=None): + if _sock is None: + _sock = _realsocket(family, type, proto) + self._sock = _sock + for method in _delegate_methods: + setattr(self, method, getattr(_sock, method)) + + def close(self, _closedsocket=_closedsocket, + _delegate_methods=_delegate_methods, setattr=setattr): + # This function should not reference any globals. See issue #808164. + self._sock = _closedsocket() + dummy = self._sock._dummy + for method in _delegate_methods: + setattr(self, method, dummy) + close.__doc__ = _realsocket.close.__doc__ + + def accept(self): + sock, addr = self._sock.accept() + return _socketobject(_sock=sock), addr + accept.__doc__ = _realsocket.accept.__doc__ + + def dup(self): + """dup() -> socket object + + Return a new socket object connected to the same system resource.""" + return _socketobject(_sock=self._sock) + + def makefile(self, mode='r', bufsize=-1): + """makefile([mode[, bufsize]]) -> file object + + Return a regular file object corresponding to the socket. The mode + and bufsize arguments are as for the built-in open() function.""" + return _fileobject(self._sock, mode, bufsize) + + family = property(lambda self: self._sock.family, doc="the socket family") + type = property(lambda self: self._sock.type, doc="the socket type") + proto = property(lambda self: self._sock.proto, doc="the socket protocol") + +def meth(name,self,*args): + return getattr(self._sock,name)(*args) + +for _m in _socketmethods: + p = partial(meth,_m) + p.__name__ = _m + p.__doc__ = getattr(_realsocket,_m).__doc__ + m = MethodType(p,None,_socketobject) + setattr(_socketobject,_m,m) + +socket = SocketType = _socketobject + +class _fileobject(object): + """Faux file object attached to a socket object.""" + + default_bufsize = 8192 + name = "" + + __slots__ = ["mode", "bufsize", "softspace", + # "closed" is a property, see below + "_sock", "_rbufsize", "_wbufsize", "_rbuf", "_wbuf", "_wbuf_len", + "_close"] + + def __init__(self, sock, mode='rb', bufsize=-1, close=False): + self._sock = sock + self.mode = mode # Not actually used in this version + if bufsize < 0: + bufsize = self.default_bufsize + self.bufsize = bufsize + self.softspace = False + # _rbufsize is the suggested recv buffer size. It is *strictly* + # obeyed within readline() for recv calls. If it is larger than + # default_bufsize it will be used for recv calls within read(). + if bufsize == 0: + self._rbufsize = 1 + elif bufsize == 1: + self._rbufsize = self.default_bufsize + else: + self._rbufsize = bufsize + self._wbufsize = bufsize + # We use StringIO for the read buffer to avoid holding a list + # of variously sized string objects which have been known to + # fragment the heap due to how they are malloc()ed and often + # realloc()ed down much smaller than their original allocation. + self._rbuf = StringIO() + self._wbuf = [] # A list of strings + self._wbuf_len = 0 + self._close = close + + def _getclosed(self): + return self._sock is None + closed = property(_getclosed, doc="True if the file is closed") + + def close(self): + try: + if self._sock: + self.flush() + finally: + if self._close: + self._sock.close() + self._sock = None + + def __del__(self): + try: + self.close() + except: + # close() may fail if __init__ didn't complete + pass + + def flush(self): + if self._wbuf: + data = "".join(self._wbuf) + self._wbuf = [] + self._wbuf_len = 0 + buffer_size = max(self._rbufsize, self.default_bufsize) + data_size = len(data) + write_offset = 0 + view = memoryview(data) + try: + while write_offset < data_size: + self._sock.sendall(view[write_offset:write_offset+buffer_size]) + write_offset += buffer_size + finally: + if write_offset < data_size: + remainder = data[write_offset:] + del view, data # explicit free + self._wbuf.append(remainder) + self._wbuf_len = len(remainder) + + def fileno(self): + return self._sock.fileno() + + def write(self, data): + data = str(data) # XXX Should really reject non-string non-buffers + if not data: + return + self._wbuf.append(data) + self._wbuf_len += len(data) + if (self._wbufsize == 0 or + (self._wbufsize == 1 and '\n' in data) or + (self._wbufsize > 1 and self._wbuf_len >= self._wbufsize)): + self.flush() + + def writelines(self, list): + # XXX We could do better here for very long lists + # XXX Should really reject non-string non-buffers + lines = filter(None, map(str, list)) + self._wbuf_len += sum(map(len, lines)) + self._wbuf.extend(lines) + if (self._wbufsize <= 1 or + self._wbuf_len >= self._wbufsize): + self.flush() + + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(rbufsize) + except error, e: + if e.args[0] == EINTR: + continue + raise + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + try: + data = self._sock.recv(left) + except error, e: + if e.args[0] == EINTR: + continue + raise + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self._sock.recv + while True: + try: + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + except error, e: + # The try..except to catch EINTR was moved outside the + # recv loop to avoid the per byte overhead. + if e.args[0] == EINTR: + continue + raise + break + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except error, e: + if e.args[0] == EINTR: + continue + raise + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO() # reset _rbuf. we consume it via buf. + while True: + try: + data = self._sock.recv(self._rbufsize) + except error, e: + if e.args[0] == EINTR: + continue + raise + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + + def readlines(self, sizehint=0): + total = 0 + list = [] + while True: + line = self.readline() + if not line: + break + list.append(line) + total += len(line) + if sizehint and total >= sizehint: + break + return list + + # Iterator protocols + + def __iter__(self): + return self + + def next(self): + line = self.readline() + if not line: + raise StopIteration + return line + +_GLOBAL_DEFAULT_TIMEOUT = object() + +def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + A host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in getaddrinfo(host, port, 0, SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket(af, socktype, proto) + if timeout is not _GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise error("getaddrinfo returns an empty list") diff --git a/third_party/stdlib/tarfile.py b/third_party/stdlib/tarfile.py new file mode 100644 index 00000000..be688005 --- /dev/null +++ b/third_party/stdlib/tarfile.py @@ -0,0 +1,2628 @@ +# -*- coding: iso-8859-1 -*- +#------------------------------------------------------------------- +# tarfile.py +#------------------------------------------------------------------- +# Copyright (C) 2002 Lars Gust�bel +# All rights reserved. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +"""Read from and write to tar format archives. +""" + +__version__ = "$Revision: 85213 $" +# $Source$ + +version = "0.9.0" +__author__ = "Lars Gust�bel (lars@gustaebel.de)" +__date__ = "$Date$" +__cvsid__ = "$Id$" +__credits__ = "Gustavo Niemeyer, Niels Gust�bel, Richard Townsend." + +#--------- +# Imports +#--------- +from __builtin__ import open as bltn_open +import sys +import os +import shutil +import stat +import errno +import time +import struct +import copy +import re +import operator + +try: + import grp, pwd +except ImportError: + grp = pwd = None + +# from tarfile import * +__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] + +#--------------------------------------------------------- +# tar constants +#--------------------------------------------------------- +NUL = "\0" # the null character +BLOCKSIZE = 512 # length of processing blocks +RECORDSIZE = BLOCKSIZE * 20 # length of records +GNU_MAGIC = "ustar \0" # magic gnu tar string +POSIX_MAGIC = "ustar\x0000" # magic posix tar string + +LENGTH_NAME = 100 # maximum length of a filename +LENGTH_LINK = 100 # maximum length of a linkname +LENGTH_PREFIX = 155 # maximum length of the prefix field + +REGTYPE = "0" # regular file +AREGTYPE = "\0" # regular file +LNKTYPE = "1" # link (inside tarfile) +SYMTYPE = "2" # symbolic link +CHRTYPE = "3" # character special device +BLKTYPE = "4" # block special device +DIRTYPE = "5" # directory +FIFOTYPE = "6" # fifo special device +CONTTYPE = "7" # contiguous file + +GNUTYPE_LONGNAME = "L" # GNU tar longname +GNUTYPE_LONGLINK = "K" # GNU tar longlink +GNUTYPE_SPARSE = "S" # GNU tar sparse file + +XHDTYPE = "x" # POSIX.1-2001 extended header +XGLTYPE = "g" # POSIX.1-2001 global header +SOLARIS_XHDTYPE = "X" # Solaris extended header + +USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format +GNU_FORMAT = 1 # GNU tar format +PAX_FORMAT = 2 # POSIX.1-2001 (pax) format +DEFAULT_FORMAT = GNU_FORMAT + +#--------------------------------------------------------- +# tarfile constants +#--------------------------------------------------------- +# File types that tarfile supports: +SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, + SYMTYPE, DIRTYPE, FIFOTYPE, + CONTTYPE, CHRTYPE, BLKTYPE, + GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, + GNUTYPE_SPARSE) + +# File types that will be treated as a regular file. +REGULAR_TYPES = (REGTYPE, AREGTYPE, + CONTTYPE, GNUTYPE_SPARSE) + +# File types that are part of the GNU tar format. +GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, + GNUTYPE_SPARSE) + +# Fields from a pax header that override a TarInfo attribute. +PAX_FIELDS = ("path", "linkpath", "size", "mtime", + "uid", "gid", "uname", "gname") + +# Fields in a pax header that are numbers, all other fields +# are treated as strings. +PAX_NUMBER_FIELDS = { + "atime": float, + "ctime": float, + "mtime": float, + "uid": int, + "gid": int, + "size": int +} + +#--------------------------------------------------------- +# Bits used in the mode field, values in octal. +#--------------------------------------------------------- +S_IFLNK = 0120000 # symbolic link +S_IFREG = 0100000 # regular file +S_IFBLK = 0060000 # block device +S_IFDIR = 0040000 # directory +S_IFCHR = 0020000 # character device +S_IFIFO = 0010000 # fifo + +TSUID = 04000 # set UID on execution +TSGID = 02000 # set GID on execution +TSVTX = 01000 # reserved + +TUREAD = 0400 # read by owner +TUWRITE = 0200 # write by owner +TUEXEC = 0100 # execute/search by owner +TGREAD = 0040 # read by group +TGWRITE = 0020 # write by group +TGEXEC = 0010 # execute/search by group +TOREAD = 0004 # read by other +TOWRITE = 0002 # write by other +TOEXEC = 0001 # execute/search by other + +#--------------------------------------------------------- +# initialization +#--------------------------------------------------------- +ENCODING = sys.getfilesystemencoding() +if ENCODING is None: + ENCODING = sys.getdefaultencoding() + +#--------------------------------------------------------- +# Some useful functions +#--------------------------------------------------------- + +def stn(s, length): + """Convert a python string to a null-terminated string buffer. + """ + return s[:length] + (length - len(s)) * NUL + +def nts(s): + """Convert a null-terminated string field to a python string. + """ + # Use the string up to the first null char. + p = s.find("\0") + if p == -1: + return s + return s[:p] + +def nti(s): + """Convert a number field to a python number. + """ + # There are two possible encodings for a number field, see + # itn() below. + if s[0] != chr(0200): + try: + n = int(nts(s).strip() or "0", 8) + except ValueError: + raise InvalidHeaderError("invalid header") + else: + n = 0L + for i in xrange(len(s) - 1): + n <<= 8 + n += ord(s[i + 1]) + return n + +def itn(n, digits=8, format=DEFAULT_FORMAT): + """Convert a python number to a number field. + """ + # POSIX 1003.1-1988 requires numbers to be encoded as a string of + # octal digits followed by a null-byte, this allows values up to + # (8**(digits-1))-1. GNU tar allows storing numbers greater than + # that if necessary. A leading 0200 byte indicates this particular + # encoding, the following digits-1 bytes are a big-endian + # representation. This allows values up to (256**(digits-1))-1. + if 0 <= n < 8 ** (digits - 1): + s = "%0*o" % (digits - 1, n) + NUL + else: + if format != GNU_FORMAT or n >= 256 ** (digits - 1): + raise ValueError("overflow in number field") + + if n < 0: + # XXX We mimic GNU tar's behaviour with negative numbers, + # this could raise OverflowError. + n = struct.unpack("L", struct.pack("l", n))[0] + + s = "" + for i in xrange(digits - 1): + s = chr(n & 0377) + s + n >>= 8 + s = chr(0200) + s + return s + +def uts(s, encoding, errors): + """Convert a unicode object to a string. + """ + if errors == "utf-8": + # An extra error handler similar to the -o invalid=UTF-8 option + # in POSIX.1-2001. Replace untranslatable characters with their + # UTF-8 representation. + try: + return s.encode(encoding, "strict") + except UnicodeEncodeError: + x = [] + for c in s: + try: + x.append(c.encode(encoding, "strict")) + except UnicodeEncodeError: + x.append(c.encode("utf8")) + return "".join(x) + else: + return s.encode(encoding, errors) + +def calc_chksums(buf): + """Calculate the checksum for a member's header by summing up all + characters except for the chksum field which is treated as if + it was filled with spaces. According to the GNU tar sources, + some tars (Sun and NeXT) calculate chksum with signed char, + which will be different if there are chars in the buffer with + the high bit set. So we calculate two checksums, unsigned and + signed. + """ + unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) + signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) + return unsigned_chksum, signed_chksum + +def copyfileobj(src, dst, length=None): + """Copy length bytes from fileobj src to fileobj dst. + If length is None, copy the entire content. + """ + if length == 0: + return + if length is None: + shutil.copyfileobj(src, dst) + return + + BUFSIZE = 16 * 1024 + blocks, remainder = divmod(length, BUFSIZE) + for b in xrange(blocks): + buf = src.read(BUFSIZE) + if len(buf) < BUFSIZE: + raise IOError("end of file reached") + dst.write(buf) + + if remainder != 0: + buf = src.read(remainder) + if len(buf) < remainder: + raise IOError("end of file reached") + dst.write(buf) + return + +filemode_table = ( + ((S_IFLNK, "l"), + (S_IFREG, "-"), + (S_IFBLK, "b"), + (S_IFDIR, "d"), + (S_IFCHR, "c"), + (S_IFIFO, "p")), + + ((TUREAD, "r"),), + ((TUWRITE, "w"),), + ((TUEXEC|TSUID, "s"), + (TSUID, "S"), + (TUEXEC, "x")), + + ((TGREAD, "r"),), + ((TGWRITE, "w"),), + ((TGEXEC|TSGID, "s"), + (TSGID, "S"), + (TGEXEC, "x")), + + ((TOREAD, "r"),), + ((TOWRITE, "w"),), + ((TOEXEC|TSVTX, "t"), + (TSVTX, "T"), + (TOEXEC, "x")) +) + +def filemode(mode): + """Convert a file's mode to a string of the form + -rwxrwxrwx. + Used by TarFile.list() + """ + perm = [] + for table in filemode_table: + for bit, char in table: + if mode & bit == bit: + perm.append(char) + break + else: + perm.append("-") + return "".join(perm) + +class TarError(Exception): + """Base exception.""" + pass +class ExtractError(TarError): + """General exception for extract errors.""" + pass +class ReadError(TarError): + """Exception for unreadable tar archives.""" + pass +class CompressionError(TarError): + """Exception for unavailable compression methods.""" + pass +class StreamError(TarError): + """Exception for unsupported operations on stream-like TarFiles.""" + pass +class HeaderError(TarError): + """Base exception for header errors.""" + pass +class EmptyHeaderError(HeaderError): + """Exception for empty headers.""" + pass +class TruncatedHeaderError(HeaderError): + """Exception for truncated headers.""" + pass +class EOFHeaderError(HeaderError): + """Exception for end of file headers.""" + pass +class InvalidHeaderError(HeaderError): + """Exception for invalid headers.""" + pass +class SubsequentHeaderError(HeaderError): + """Exception for missing and invalid extended headers.""" + pass + +#--------------------------- +# internal stream interface +#--------------------------- +class _LowLevelFile: + """Low-level file object. Supports reading and writing. + It is used instead of a regular file object for streaming + access. + """ + + def __init__(self, name, mode): + mode = { + "r": os.O_RDONLY, + "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + }[mode] + if hasattr(os, "O_BINARY"): + mode |= os.O_BINARY + self.fd = os.open(name, mode, 0666) + + def close(self): + os.close(self.fd) + + def read(self, size): + return os.read(self.fd, size) + + def write(self, s): + os.write(self.fd, s) + +class _Stream: + """Class that serves as an adapter between TarFile and + a stream-like object. The stream-like object only + needs to have a read() or write() method and is accessed + blockwise. Use of gzip or bzip2 compression is possible. + A stream-like object could be for example: sys.stdin, + sys.stdout, a socket, a tape device etc. + + _Stream is intended to be used only internally. + """ + + def __init__(self, name, mode, comptype, fileobj, bufsize): + """Construct a _Stream object. + """ + self._extfileobj = True + if fileobj is None: + fileobj = _LowLevelFile(name, mode) + self._extfileobj = False + + if comptype == '*': + # Enable transparent compression detection for the + # stream interface + fileobj = _StreamProxy(fileobj) + comptype = fileobj.getcomptype() + + self.name = name or "" + self.mode = mode + self.comptype = comptype + self.fileobj = fileobj + self.bufsize = bufsize + self.buf = "" + self.pos = 0L + self.closed = False + + try: + if comptype == "gz": + try: + import zlib + except ImportError: + raise CompressionError("zlib module is not available") + self.zlib = zlib + self.crc = zlib.crc32("") & 0xffffffffL + if mode == "r": + self._init_read_gz() + else: + self._init_write_gz() + + elif comptype == "bz2": + try: + import bz2 + except ImportError: + raise CompressionError("bz2 module is not available") + if mode == "r": + self.dbuf = "" + self.cmp = bz2.BZ2Decompressor() + else: + self.cmp = bz2.BZ2Compressor() + except: + if not self._extfileobj: + self.fileobj.close() + self.closed = True + raise + + def __del__(self): + if hasattr(self, "closed") and not self.closed: + self.close() + + def _init_write_gz(self): + """Initialize for writing with gzip compression. + """ + self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, + -self.zlib.MAX_WBITS, + self.zlib.DEF_MEM_LEVEL, + 0) + timestamp = struct.pack(" self.bufsize: + self.fileobj.write(self.buf[:self.bufsize]) + self.buf = self.buf[self.bufsize:] + + def close(self): + """Close the _Stream object. No operation should be + done on it afterwards. + """ + if self.closed: + return + + self.closed = True + try: + if self.mode == "w" and self.comptype != "tar": + self.buf += self.cmp.flush() + + if self.mode == "w" and self.buf: + self.fileobj.write(self.buf) + self.buf = "" + if self.comptype == "gz": + # The native zlib crc is an unsigned 32-bit integer, but + # the Python wrapper implicitly casts that to a signed C + # long. So, on a 32-bit box self.crc may "look negative", + # while the same crc on a 64-bit box may "look positive". + # To avoid irksome warnings from the `struct` module, force + # it to look positive on all boxes. + self.fileobj.write(struct.pack("= 0: + blocks, remainder = divmod(pos - self.pos, self.bufsize) + for i in xrange(blocks): + self.read(self.bufsize) + self.read(remainder) + else: + raise StreamError("seeking backwards is not allowed") + return self.pos + + def read(self, size=None): + """Return the next size number of bytes from the stream. + If size is not defined, return all bytes of the stream + up to EOF. + """ + if size is None: + t = [] + while True: + buf = self._read(self.bufsize) + if not buf: + break + t.append(buf) + buf = "".join(t) + else: + buf = self._read(size) + self.pos += len(buf) + return buf + + def _read(self, size): + """Return size bytes from the stream. + """ + if self.comptype == "tar": + return self.__read(size) + + c = len(self.dbuf) + t = [self.dbuf] + while c < size: + buf = self.__read(self.bufsize) + if not buf: + break + try: + buf = self.cmp.decompress(buf) + except IOError: + raise ReadError("invalid compressed data") + t.append(buf) + c += len(buf) + t = "".join(t) + self.dbuf = t[size:] + return t[:size] + + def __read(self, size): + """Return size bytes from stream. If internal buffer is empty, + read another block from the stream. + """ + c = len(self.buf) + t = [self.buf] + while c < size: + buf = self.fileobj.read(self.bufsize) + if not buf: + break + t.append(buf) + c += len(buf) + t = "".join(t) + self.buf = t[size:] + return t[:size] +# class _Stream + +class _StreamProxy(object): + """Small proxy class that enables transparent compression + detection for the Stream interface (mode 'r|*'). + """ + + def __init__(self, fileobj): + self.fileobj = fileobj + self.buf = self.fileobj.read(BLOCKSIZE) + + def read(self, size): + self.read = self.fileobj.read + return self.buf + + def getcomptype(self): + if self.buf.startswith("\037\213\010"): + return "gz" + if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": + return "bz2" + return "tar" + + def close(self): + self.fileobj.close() +# class StreamProxy + +class _BZ2Proxy(object): + """Small proxy class that enables external file object + support for "r:bz2" and "w:bz2" modes. This is actually + a workaround for a limitation in bz2 module's BZ2File + class which (unlike gzip.GzipFile) has no support for + a file object argument. + """ + + blocksize = 16 * 1024 + + def __init__(self, fileobj, mode): + self.fileobj = fileobj + self.mode = mode + self.name = getattr(self.fileobj, "name", None) + self.init() + + def init(self): + import bz2 + self.pos = 0 + if self.mode == "r": + self.bz2obj = bz2.BZ2Decompressor() + self.fileobj.seek(0) + self.buf = "" + else: + self.bz2obj = bz2.BZ2Compressor() + + def read(self, size): + b = [self.buf] + x = len(self.buf) + while x < size: + raw = self.fileobj.read(self.blocksize) + if not raw: + break + data = self.bz2obj.decompress(raw) + b.append(data) + x += len(data) + self.buf = "".join(b) + + buf = self.buf[:size] + self.buf = self.buf[size:] + self.pos += len(buf) + return buf + + def seek(self, pos): + if pos < self.pos: + self.init() + self.read(pos - self.pos) + + def tell(self): + return self.pos + + def write(self, data): + self.pos += len(data) + raw = self.bz2obj.compress(data) + self.fileobj.write(raw) + + def close(self): + if self.mode == "w": + raw = self.bz2obj.flush() + self.fileobj.write(raw) +# class _BZ2Proxy + +#------------------------ +# Extraction file object +#------------------------ +class _FileInFile(object): + """A thin wrapper around an existing file object that + provides a part of its data as an individual file + object. + """ + + def __init__(self, fileobj, offset, size, sparse=None): + self.fileobj = fileobj + self.offset = offset + self.size = size + self.sparse = sparse + self.position = 0 + + def tell(self): + """Return the current file position. + """ + return self.position + + def seek(self, position): + """Seek to a position in the file. + """ + self.position = position + + def read(self, size=None): + """Read data from the file. + """ + if size is None: + size = self.size - self.position + else: + size = min(size, self.size - self.position) + + if self.sparse is None: + return self.readnormal(size) + else: + return self.readsparse(size) + + def __read(self, size): + buf = self.fileobj.read(size) + if len(buf) != size: + raise ReadError("unexpected end of data") + return buf + + def readnormal(self, size): + """Read operation for regular files. + """ + self.fileobj.seek(self.offset + self.position) + self.position += size + return self.__read(size) + + def readsparse(self, size): + """Read operation for sparse files. + """ + data = [] + while size > 0: + buf = self.readsparsesection(size) + if not buf: + break + size -= len(buf) + data.append(buf) + return "".join(data) + + def readsparsesection(self, size): + """Read a single section of a sparse file. + """ + section = self.sparse.find(self.position) + + if section is None: + return "" + + size = min(size, section.offset + section.size - self.position) + + if isinstance(section, _data): + realpos = section.realpos + self.position - section.offset + self.fileobj.seek(self.offset + realpos) + self.position += size + return self.__read(size) + else: + self.position += size + return NUL * size +#class _FileInFile + + +class ExFileObject(object): + """File-like object for reading an archive member. + Is returned by TarFile.extractfile(). + """ + blocksize = 1024 + + def __init__(self, tarfile, tarinfo): + self.fileobj = _FileInFile(tarfile.fileobj, + tarinfo.offset_data, + tarinfo.size, + getattr(tarinfo, "sparse", None)) + self.name = tarinfo.name + self.mode = "r" + self.closed = False + self.size = tarinfo.size + + self.position = 0 + self.buffer = "" + + def read(self, size=None): + """Read at most size bytes from the file. If size is not + present or None, read all data until EOF is reached. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + buf = "" + if self.buffer: + if size is None: + buf = self.buffer + self.buffer = "" + else: + buf = self.buffer[:size] + self.buffer = self.buffer[size:] + + if size is None: + buf += self.fileobj.read() + else: + buf += self.fileobj.read(size - len(buf)) + + self.position += len(buf) + return buf + + def readline(self, size=-1): + """Read one entire line from the file. If size is present + and non-negative, return a string with at most that + size, which may be an incomplete line. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + if "\n" in self.buffer: + pos = self.buffer.find("\n") + 1 + else: + buffers = [self.buffer] + while True: + buf = self.fileobj.read(self.blocksize) + buffers.append(buf) + if not buf or "\n" in buf: + self.buffer = "".join(buffers) + pos = self.buffer.find("\n") + 1 + if pos == 0: + # no newline found. + pos = len(self.buffer) + break + + if size != -1: + pos = min(size, pos) + + buf = self.buffer[:pos] + self.buffer = self.buffer[pos:] + self.position += len(buf) + return buf + + def readlines(self): + """Return a list with all remaining lines. + """ + result = [] + while True: + line = self.readline() + if not line: break + result.append(line) + return result + + def tell(self): + """Return the current file position. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + return self.position + + def seek(self, pos, whence=os.SEEK_SET): + """Seek to a position in the file. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + if whence == os.SEEK_SET: + self.position = min(max(pos, 0), self.size) + elif whence == os.SEEK_CUR: + if pos < 0: + self.position = max(self.position + pos, 0) + else: + self.position = min(self.position + pos, self.size) + elif whence == os.SEEK_END: + self.position = max(min(self.size + pos, self.size), 0) + else: + raise ValueError("Invalid argument") + + self.buffer = "" + self.fileobj.seek(self.position) + + def close(self): + """Close the file object. + """ + self.closed = True + + def __iter__(self): + """Get an iterator over the file's lines. + """ + while True: + line = self.readline() + if not line: + break + yield line +#class ExFileObject + +#------------------ +# Exported Classes +#------------------ +class TarInfo(object): + """Informational class which holds the details about an + archive member given by a tar header block. + TarInfo objects are returned by TarFile.getmember(), + TarFile.getmembers() and TarFile.gettarinfo() and are + usually created internally. + """ + + def __init__(self, name=""): + """Construct a TarInfo object. name is the optional name + of the member. + """ + self.name = name # member name + self.mode = 0644 # file permissions + self.uid = 0 # user id + self.gid = 0 # group id + self.size = 0 # file size + self.mtime = 0 # modification time + self.chksum = 0 # header checksum + self.type = REGTYPE # member type + self.linkname = "" # link name + self.uname = "" # user name + self.gname = "" # group name + self.devmajor = 0 # device major number + self.devminor = 0 # device minor number + + self.offset = 0 # the tar header starts here + self.offset_data = 0 # the file's data starts here + + self.pax_headers = {} # pax header information + + # In pax headers the "name" and "linkname" field are called + # "path" and "linkpath". + def _getpath(self): + return self.name + def _setpath(self, name): + self.name = name + path = property(_getpath, _setpath) + + def _getlinkpath(self): + return self.linkname + def _setlinkpath(self, linkname): + self.linkname = linkname + linkpath = property(_getlinkpath, _setlinkpath) + + def __repr__(self): + return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) + + def get_info(self, encoding, errors): + """Return the TarInfo's attributes as a dictionary. + """ + info = { + "name": self.name, + "mode": self.mode & 07777, + "uid": self.uid, + "gid": self.gid, + "size": self.size, + "mtime": self.mtime, + "chksum": self.chksum, + "type": self.type, + "linkname": self.linkname, + "uname": self.uname, + "gname": self.gname, + "devmajor": self.devmajor, + "devminor": self.devminor + } + + if info["type"] == DIRTYPE and not info["name"].endswith("/"): + info["name"] += "/" + + for key in ("name", "linkname", "uname", "gname"): + if type(info[key]) is unicode: + info[key] = info[key].encode(encoding, errors) + + return info + + def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): + """Return a tar header as a string of 512 byte blocks. + """ + info = self.get_info(encoding, errors) + + if format == USTAR_FORMAT: + return self.create_ustar_header(info) + elif format == GNU_FORMAT: + return self.create_gnu_header(info) + elif format == PAX_FORMAT: + return self.create_pax_header(info, encoding, errors) + else: + raise ValueError("invalid format") + + def create_ustar_header(self, info): + """Return the object as a ustar header block. + """ + info["magic"] = POSIX_MAGIC + + if len(info["linkname"]) > LENGTH_LINK: + raise ValueError("linkname is too long") + + if len(info["name"]) > LENGTH_NAME: + info["prefix"], info["name"] = self._posix_split_name(info["name"]) + + return self._create_header(info, USTAR_FORMAT) + + def create_gnu_header(self, info): + """Return the object as a GNU header block sequence. + """ + info["magic"] = GNU_MAGIC + + buf = "" + if len(info["linkname"]) > LENGTH_LINK: + buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) + + if len(info["name"]) > LENGTH_NAME: + buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) + + return buf + self._create_header(info, GNU_FORMAT) + + def create_pax_header(self, info, encoding, errors): + """Return the object as a ustar header block. If it cannot be + represented this way, prepend a pax extended header sequence + with supplement information. + """ + info["magic"] = POSIX_MAGIC + pax_headers = self.pax_headers.copy() + + # Test string fields for values that exceed the field length or cannot + # be represented in ASCII encoding. + for name, hname, length in ( + ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), + ("uname", "uname", 32), ("gname", "gname", 32)): + + if hname in pax_headers: + # The pax header has priority. + continue + + val = info[name].decode(encoding, errors) + + # Try to encode the string as ASCII. + try: + val.encode("ascii") + except UnicodeEncodeError: + pax_headers[hname] = val + continue + + if len(info[name]) > length: + pax_headers[hname] = val + + # Test number fields for values that exceed the field limit or values + # that like to be stored as float. + for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): + if name in pax_headers: + # The pax header has priority. Avoid overflow. + info[name] = 0 + continue + + val = info[name] + if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): + pax_headers[name] = unicode(val) + info[name] = 0 + + # Create a pax extended header if necessary. + if pax_headers: + buf = self._create_pax_generic_header(pax_headers) + else: + buf = "" + + return buf + self._create_header(info, USTAR_FORMAT) + + @classmethod + def create_pax_global_header(cls, pax_headers): + """Return the object as a pax global header block sequence. + """ + return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) + + def _posix_split_name(self, name): + """Split a name longer than 100 chars into a prefix + and a name part. + """ + prefix = name[:LENGTH_PREFIX + 1] + while prefix and prefix[-1] != "/": + prefix = prefix[:-1] + + name = name[len(prefix):] + prefix = prefix[:-1] + + if not prefix or len(name) > LENGTH_NAME: + raise ValueError("name is too long") + return prefix, name + + @staticmethod + def _create_header(info, format): + """Return a header block. info is a dictionary with file + information, format must be one of the *_FORMAT constants. + """ + parts = [ + stn(info.get("name", ""), 100), + itn(info.get("mode", 0) & 07777, 8, format), + itn(info.get("uid", 0), 8, format), + itn(info.get("gid", 0), 8, format), + itn(info.get("size", 0), 12, format), + itn(info.get("mtime", 0), 12, format), + " ", # checksum field + info.get("type", REGTYPE), + stn(info.get("linkname", ""), 100), + stn(info.get("magic", POSIX_MAGIC), 8), + stn(info.get("uname", ""), 32), + stn(info.get("gname", ""), 32), + itn(info.get("devmajor", 0), 8, format), + itn(info.get("devminor", 0), 8, format), + stn(info.get("prefix", ""), 155) + ] + + buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) + chksum = calc_chksums(buf[-BLOCKSIZE:])[0] + buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] + return buf + + @staticmethod + def _create_payload(payload): + """Return the string payload filled with zero bytes + up to the next 512 byte border. + """ + blocks, remainder = divmod(len(payload), BLOCKSIZE) + if remainder > 0: + payload += (BLOCKSIZE - remainder) * NUL + return payload + + @classmethod + def _create_gnu_long_header(cls, name, type): + """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence + for name. + """ + name += NUL + + info = {} + info["name"] = "././@LongLink" + info["type"] = type + info["size"] = len(name) + info["magic"] = GNU_MAGIC + + # create extended header + name blocks. + return cls._create_header(info, USTAR_FORMAT) + \ + cls._create_payload(name) + + @classmethod + def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): + """Return a POSIX.1-2001 extended or global header sequence + that contains a list of keyword, value pairs. The values + must be unicode objects. + """ + records = [] + for keyword, value in pax_headers.iteritems(): + keyword = keyword.encode("utf8") + value = value.encode("utf8") + l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' + n = p = 0 + while True: + n = l + len(str(p)) + if n == p: + break + p = n + records.append("%d %s=%s\n" % (p, keyword, value)) + records = "".join(records) + + # We use a hardcoded "././@PaxHeader" name like star does + # instead of the one that POSIX recommends. + info = {} + info["name"] = "././@PaxHeader" + info["type"] = type + info["size"] = len(records) + info["magic"] = POSIX_MAGIC + + # Create pax header + record blocks. + return cls._create_header(info, USTAR_FORMAT) + \ + cls._create_payload(records) + + @classmethod + def frombuf(cls, buf): + """Construct a TarInfo object from a 512 byte string buffer. + """ + if len(buf) == 0: + raise EmptyHeaderError("empty header") + if len(buf) != BLOCKSIZE: + raise TruncatedHeaderError("truncated header") + if buf.count(NUL) == BLOCKSIZE: + raise EOFHeaderError("end of file header") + + chksum = nti(buf[148:156]) + if chksum not in calc_chksums(buf): + raise InvalidHeaderError("bad checksum") + + obj = cls() + obj.buf = buf + obj.name = nts(buf[0:100]) + obj.mode = nti(buf[100:108]) + obj.uid = nti(buf[108:116]) + obj.gid = nti(buf[116:124]) + obj.size = nti(buf[124:136]) + obj.mtime = nti(buf[136:148]) + obj.chksum = chksum + obj.type = buf[156:157] + obj.linkname = nts(buf[157:257]) + obj.uname = nts(buf[265:297]) + obj.gname = nts(buf[297:329]) + obj.devmajor = nti(buf[329:337]) + obj.devminor = nti(buf[337:345]) + prefix = nts(buf[345:500]) + + # Old V7 tar format represents a directory as a regular + # file with a trailing slash. + if obj.type == AREGTYPE and obj.name.endswith("/"): + obj.type = DIRTYPE + + # Remove redundant slashes from directories. + if obj.isdir(): + obj.name = obj.name.rstrip("/") + + # Reconstruct a ustar longname. + if prefix and obj.type not in GNU_TYPES: + obj.name = prefix + "/" + obj.name + return obj + + @classmethod + def fromtarfile(cls, tarfile): + """Return the next TarInfo object from TarFile object + tarfile. + """ + buf = tarfile.fileobj.read(BLOCKSIZE) + obj = cls.frombuf(buf) + obj.offset = tarfile.fileobj.tell() - BLOCKSIZE + return obj._proc_member(tarfile) + + #-------------------------------------------------------------------------- + # The following are methods that are called depending on the type of a + # member. The entry point is _proc_member() which can be overridden in a + # subclass to add custom _proc_*() methods. A _proc_*() method MUST + # implement the following + # operations: + # 1. Set self.offset_data to the position where the data blocks begin, + # if there is data that follows. + # 2. Set tarfile.offset to the position where the next member's header will + # begin. + # 3. Return self or another valid TarInfo object. + def _proc_member(self, tarfile): + """Choose the right processing method depending on + the type and call it. + """ + if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): + return self._proc_gnulong(tarfile) + elif self.type == GNUTYPE_SPARSE: + return self._proc_sparse(tarfile) + elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): + return self._proc_pax(tarfile) + else: + return self._proc_builtin(tarfile) + + def _proc_builtin(self, tarfile): + """Process a builtin type or an unknown type which + will be treated as a regular file. + """ + self.offset_data = tarfile.fileobj.tell() + offset = self.offset_data + if self.isreg() or self.type not in SUPPORTED_TYPES: + # Skip the following data blocks. + offset += self._block(self.size) + tarfile.offset = offset + + # Patch the TarInfo object with saved global + # header information. + self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) + + return self + + def _proc_gnulong(self, tarfile): + """Process the blocks that hold a GNU longname + or longlink member. + """ + buf = tarfile.fileobj.read(self._block(self.size)) + + # Fetch the next header and process it. + try: + next = self.fromtarfile(tarfile) + except HeaderError: + raise SubsequentHeaderError("missing or bad subsequent header") + + # Patch the TarInfo object from the next header with + # the longname information. + next.offset = self.offset + if self.type == GNUTYPE_LONGNAME: + next.name = nts(buf) + elif self.type == GNUTYPE_LONGLINK: + next.linkname = nts(buf) + + return next + + def _proc_sparse(self, tarfile): + """Process a GNU sparse header plus extra headers. + """ + buf = self.buf + sp = _ringbuffer() + pos = 386 + lastpos = 0L + realpos = 0L + # There are 4 possible sparse structs in the + # first header. + for i in xrange(4): + try: + offset = nti(buf[pos:pos + 12]) + numbytes = nti(buf[pos + 12:pos + 24]) + except ValueError: + break + if offset > lastpos: + sp.append(_hole(lastpos, offset - lastpos)) + sp.append(_data(offset, numbytes, realpos)) + realpos += numbytes + lastpos = offset + numbytes + pos += 24 + + isextended = ord(buf[482]) + origsize = nti(buf[483:495]) + + # If the isextended flag is given, + # there are extra headers to process. + while isextended == 1: + buf = tarfile.fileobj.read(BLOCKSIZE) + pos = 0 + for i in xrange(21): + try: + offset = nti(buf[pos:pos + 12]) + numbytes = nti(buf[pos + 12:pos + 24]) + except ValueError: + break + if offset > lastpos: + sp.append(_hole(lastpos, offset - lastpos)) + sp.append(_data(offset, numbytes, realpos)) + realpos += numbytes + lastpos = offset + numbytes + pos += 24 + isextended = ord(buf[504]) + + if lastpos < origsize: + sp.append(_hole(lastpos, origsize - lastpos)) + + self.sparse = sp + + self.offset_data = tarfile.fileobj.tell() + tarfile.offset = self.offset_data + self._block(self.size) + self.size = origsize + + return self + + def _proc_pax(self, tarfile): + """Process an extended or global header as described in + POSIX.1-2001. + """ + # Read the header information. + buf = tarfile.fileobj.read(self._block(self.size)) + + # A pax header stores supplemental information for either + # the following file (extended) or all following files + # (global). + if self.type == XGLTYPE: + pax_headers = tarfile.pax_headers + else: + pax_headers = tarfile.pax_headers.copy() + + # Parse pax header information. A record looks like that: + # "%d %s=%s\n" % (length, keyword, value). length is the size + # of the complete record including the length field itself and + # the newline. keyword and value are both UTF-8 encoded strings. + regex = re.compile(r"(\d+) ([^=]+)=", re.U) + pos = 0 + while True: + match = regex.match(buf, pos) + if not match: + break + + length, keyword = match.groups() + length = int(length) + value = buf[match.end(2) + 1:match.start(1) + length - 1] + + keyword = keyword.decode("utf8") + value = value.decode("utf8") + + pax_headers[keyword] = value + pos += length + + # Fetch the next header. + try: + next = self.fromtarfile(tarfile) + except HeaderError: + raise SubsequentHeaderError("missing or bad subsequent header") + + if self.type in (XHDTYPE, SOLARIS_XHDTYPE): + # Patch the TarInfo object with the extended header info. + next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) + next.offset = self.offset + + if "size" in pax_headers: + # If the extended header replaces the size field, + # we need to recalculate the offset where the next + # header starts. + offset = next.offset_data + if next.isreg() or next.type not in SUPPORTED_TYPES: + offset += next._block(next.size) + tarfile.offset = offset + + return next + + def _apply_pax_info(self, pax_headers, encoding, errors): + """Replace fields with supplemental information from a previous + pax extended or global header. + """ + for keyword, value in pax_headers.iteritems(): + if keyword not in PAX_FIELDS: + continue + + if keyword == "path": + value = value.rstrip("/") + + if keyword in PAX_NUMBER_FIELDS: + try: + value = PAX_NUMBER_FIELDS[keyword](value) + except ValueError: + value = 0 + else: + value = uts(value, encoding, errors) + + setattr(self, keyword, value) + + self.pax_headers = pax_headers.copy() + + def _block(self, count): + """Round up a byte count by BLOCKSIZE and return it, + e.g. _block(834) => 1024. + """ + blocks, remainder = divmod(count, BLOCKSIZE) + if remainder: + blocks += 1 + return blocks * BLOCKSIZE + + def isreg(self): + return self.type in REGULAR_TYPES + def isfile(self): + return self.isreg() + def isdir(self): + return self.type == DIRTYPE + def issym(self): + return self.type == SYMTYPE + def islnk(self): + return self.type == LNKTYPE + def ischr(self): + return self.type == CHRTYPE + def isblk(self): + return self.type == BLKTYPE + def isfifo(self): + return self.type == FIFOTYPE + def issparse(self): + return self.type == GNUTYPE_SPARSE + def isdev(self): + return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) +# class TarInfo + +class TarFile(object): + """The TarFile Class provides an interface to tar archives. + """ + + debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) + + dereference = False # If true, add content of linked file to the + # tar file, else the link. + + ignore_zeros = False # If true, skips empty or invalid blocks and + # continues processing. + + errorlevel = 1 # If 0, fatal errors only appear in debug + # messages (if debug >= 0). If > 0, errors + # are passed to the caller as exceptions. + + format = DEFAULT_FORMAT # The format to use when creating an archive. + + encoding = ENCODING # Encoding for 8-bit character strings. + + errors = None # Error handler for unicode conversion. + + tarinfo = TarInfo # The default TarInfo class to use. + + fileobject = ExFileObject # The default ExFileObject class to use. + + def __init__(self, name=None, mode="r", fileobj=None, format=None, + tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, + errors=None, pax_headers=None, debug=None, errorlevel=None): + """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to + read from an existing archive, 'a' to append data to an existing + file or 'w' to create a new file overwriting an existing one. `mode' + defaults to 'r'. + If `fileobj' is given, it is used for reading or writing data. If it + can be determined, `mode' is overridden by `fileobj's mode. + `fileobj' is not closed, when TarFile is closed. + """ + modes = {"r": "rb", "a": "r+b", "w": "wb"} + if mode not in modes: + raise ValueError("mode must be 'r', 'a' or 'w'") + self.mode = mode + self._mode = modes[mode] + + if not fileobj: + if self.mode == "a" and not os.path.exists(name): + # Create nonexistent files in append mode. + self.mode = "w" + self._mode = "wb" + fileobj = bltn_open(name, self._mode) + self._extfileobj = False + else: + if name is None and hasattr(fileobj, "name"): + name = fileobj.name + if hasattr(fileobj, "mode"): + self._mode = fileobj.mode + self._extfileobj = True + self.name = os.path.abspath(name) if name else None + self.fileobj = fileobj + + # Init attributes. + if format is not None: + self.format = format + if tarinfo is not None: + self.tarinfo = tarinfo + if dereference is not None: + self.dereference = dereference + if ignore_zeros is not None: + self.ignore_zeros = ignore_zeros + if encoding is not None: + self.encoding = encoding + + if errors is not None: + self.errors = errors + elif mode == "r": + self.errors = "utf-8" + else: + self.errors = "strict" + + if pax_headers is not None and self.format == PAX_FORMAT: + self.pax_headers = pax_headers + else: + self.pax_headers = {} + + if debug is not None: + self.debug = debug + if errorlevel is not None: + self.errorlevel = errorlevel + + # Init datastructures. + self.closed = False + self.members = [] # list of members as TarInfo objects + self._loaded = False # flag if all members have been read + self.offset = self.fileobj.tell() + # current position in the archive file + self.inodes = {} # dictionary caching the inodes of + # archive members already added + + try: + if self.mode == "r": + self.firstmember = None + self.firstmember = self.next() + + if self.mode == "a": + # Move to the end of the archive, + # before the first empty block. + while True: + self.fileobj.seek(self.offset) + try: + tarinfo = self.tarinfo.fromtarfile(self) + self.members.append(tarinfo) + except EOFHeaderError: + self.fileobj.seek(self.offset) + break + except HeaderError, e: + raise ReadError(str(e)) + + if self.mode in "aw": + self._loaded = True + + if self.pax_headers: + buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) + self.fileobj.write(buf) + self.offset += len(buf) + except: + if not self._extfileobj: + self.fileobj.close() + self.closed = True + raise + + def _getposix(self): + return self.format == USTAR_FORMAT + def _setposix(self, value): + import warnings + warnings.warn("use the format attribute instead", DeprecationWarning, + 2) + if value: + self.format = USTAR_FORMAT + else: + self.format = GNU_FORMAT + posix = property(_getposix, _setposix) + + #-------------------------------------------------------------------------- + # Below are the classmethods which act as alternate constructors to the + # TarFile class. The open() method is the only one that is needed for + # public use; it is the "super"-constructor and is able to select an + # adequate "sub"-constructor for a particular compression using the mapping + # from OPEN_METH. + # + # This concept allows one to subclass TarFile without losing the comfort of + # the super-constructor. A sub-constructor is registered and made available + # by adding it to the mapping in OPEN_METH. + + @classmethod + def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): + """Open a tar archive for reading, writing or appending. Return + an appropriate TarFile class. + + mode: + 'r' or 'r:*' open for reading with transparent compression + 'r:' open for reading exclusively uncompressed + 'r:gz' open for reading with gzip compression + 'r:bz2' open for reading with bzip2 compression + 'a' or 'a:' open for appending, creating the file if necessary + 'w' or 'w:' open for writing without compression + 'w:gz' open for writing with gzip compression + 'w:bz2' open for writing with bzip2 compression + + 'r|*' open a stream of tar blocks with transparent compression + 'r|' open an uncompressed stream of tar blocks for reading + 'r|gz' open a gzip compressed stream of tar blocks + 'r|bz2' open a bzip2 compressed stream of tar blocks + 'w|' open an uncompressed stream for writing + 'w|gz' open a gzip compressed stream for writing + 'w|bz2' open a bzip2 compressed stream for writing + """ + + if not name and not fileobj: + raise ValueError("nothing to open") + + if mode in ("r", "r:*"): + # Find out which *open() is appropriate for opening the file. + for comptype in cls.OPEN_METH: + func = getattr(cls, cls.OPEN_METH[comptype]) + if fileobj is not None: + saved_pos = fileobj.tell() + try: + return func(name, "r", fileobj, **kwargs) + except (ReadError, CompressionError), e: + if fileobj is not None: + fileobj.seek(saved_pos) + continue + raise ReadError("file could not be opened successfully") + + elif ":" in mode: + filemode, comptype = mode.split(":", 1) + filemode = filemode or "r" + comptype = comptype or "tar" + + # Select the *open() function according to + # given compression. + if comptype in cls.OPEN_METH: + func = getattr(cls, cls.OPEN_METH[comptype]) + else: + raise CompressionError("unknown compression type %r" % comptype) + return func(name, filemode, fileobj, **kwargs) + + elif "|" in mode: + filemode, comptype = mode.split("|", 1) + filemode = filemode or "r" + comptype = comptype or "tar" + + if filemode not in ("r", "w"): + raise ValueError("mode must be 'r' or 'w'") + + stream = _Stream(name, filemode, comptype, fileobj, bufsize) + try: + t = cls(name, filemode, stream, **kwargs) + except: + stream.close() + raise + t._extfileobj = False + return t + + elif mode in ("a", "w"): + return cls.taropen(name, mode, fileobj, **kwargs) + + raise ValueError("undiscernible mode") + + @classmethod + def taropen(cls, name, mode="r", fileobj=None, **kwargs): + """Open uncompressed tar archive name for reading or writing. + """ + if mode not in ("r", "a", "w"): + raise ValueError("mode must be 'r', 'a' or 'w'") + return cls(name, mode, fileobj, **kwargs) + + @classmethod + def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): + """Open gzip compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if mode not in ("r", "w"): + raise ValueError("mode must be 'r' or 'w'") + + try: + import gzip + gzip.GzipFile + except (ImportError, AttributeError): + raise CompressionError("gzip module is not available") + + try: + fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj) + except OSError: + if fileobj is not None and mode == 'r': + raise ReadError("not a gzip file") + raise + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except IOError: + fileobj.close() + if mode == 'r': + raise ReadError("not a gzip file") + raise + except: + fileobj.close() + raise + t._extfileobj = False + return t + + @classmethod + def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): + """Open bzip2 compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if mode not in ("r", "w"): + raise ValueError("mode must be 'r' or 'w'.") + + try: + import bz2 + except ImportError: + raise CompressionError("bz2 module is not available") + + if fileobj is not None: + fileobj = _BZ2Proxy(fileobj, mode) + else: + fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except (IOError, EOFError): + fileobj.close() + if mode == 'r': + raise ReadError("not a bzip2 file") + raise + except: + fileobj.close() + raise + t._extfileobj = False + return t + + # All *open() methods are registered here. + OPEN_METH = { + "tar": "taropen", # uncompressed tar + "gz": "gzopen", # gzip compressed tar + "bz2": "bz2open" # bzip2 compressed tar + } + + #-------------------------------------------------------------------------- + # The public methods which TarFile provides: + + def close(self): + """Close the TarFile. In write-mode, two finishing zero blocks are + appended to the archive. + """ + if self.closed: + return + + self.closed = True + try: + if self.mode in "aw": + self.fileobj.write(NUL * (BLOCKSIZE * 2)) + self.offset += (BLOCKSIZE * 2) + # fill up the end with zero-blocks + # (like option -b20 for tar does) + blocks, remainder = divmod(self.offset, RECORDSIZE) + if remainder > 0: + self.fileobj.write(NUL * (RECORDSIZE - remainder)) + finally: + if not self._extfileobj: + self.fileobj.close() + + def getmember(self, name): + """Return a TarInfo object for member `name'. If `name' can not be + found in the archive, KeyError is raised. If a member occurs more + than once in the archive, its last occurrence is assumed to be the + most up-to-date version. + """ + tarinfo = self._getmember(name) + if tarinfo is None: + raise KeyError("filename %r not found" % name) + return tarinfo + + def getmembers(self): + """Return the members of the archive as a list of TarInfo objects. The + list has the same order as the members in the archive. + """ + self._check() + if not self._loaded: # if we want to obtain a list of + self._load() # all members, we first have to + # scan the whole archive. + return self.members + + def getnames(self): + """Return the members of the archive as a list of their names. It has + the same order as the list returned by getmembers(). + """ + return [tarinfo.name for tarinfo in self.getmembers()] + + def gettarinfo(self, name=None, arcname=None, fileobj=None): + """Create a TarInfo object from the result of os.stat or equivalent + on an existing file. The file is either named by `name', or + specified as a file object `fileobj' with a file descriptor. If + given, `arcname' specifies an alternative name for the file in the + archive, otherwise, the name is taken from the 'name' attribute of + 'fileobj', or the 'name' argument. + """ + self._check("aw") + + # When fileobj is given, replace name by + # fileobj's real name. + if fileobj is not None: + name = fileobj.name + + # Building the name of the member in the archive. + # Backward slashes are converted to forward slashes, + # Absolute paths are turned to relative paths. + if arcname is None: + arcname = name + drv, arcname = os.path.splitdrive(arcname) + arcname = arcname.replace(os.sep, "/") + arcname = arcname.lstrip("/") + + # Now, fill the TarInfo object with + # information specific for the file. + tarinfo = self.tarinfo() + tarinfo.tarfile = self # Not needed + + # Use os.stat or os.lstat, depending on platform + # and if symlinks shall be resolved. + if fileobj is None: + if hasattr(os, "lstat") and not self.dereference: + statres = os.lstat(name) + else: + statres = os.stat(name) + else: + statres = os.fstat(fileobj.fileno()) + linkname = "" + + stmd = statres.st_mode + if stat.S_ISREG(stmd): + inode = (statres.st_ino, statres.st_dev) + if not self.dereference and statres.st_nlink > 1 and \ + inode in self.inodes and arcname != self.inodes[inode]: + # Is it a hardlink to an already + # archived file? + type = LNKTYPE + linkname = self.inodes[inode] + else: + # The inode is added only if its valid. + # For win32 it is always 0. + type = REGTYPE + if inode[0]: + self.inodes[inode] = arcname + elif stat.S_ISDIR(stmd): + type = DIRTYPE + elif stat.S_ISFIFO(stmd): + type = FIFOTYPE + elif stat.S_ISLNK(stmd): + type = SYMTYPE + linkname = os.readlink(name) + elif stat.S_ISCHR(stmd): + type = CHRTYPE + elif stat.S_ISBLK(stmd): + type = BLKTYPE + else: + return None + + # Fill the TarInfo object with all + # information we can get. + tarinfo.name = arcname + tarinfo.mode = stmd + tarinfo.uid = statres.st_uid + tarinfo.gid = statres.st_gid + if type == REGTYPE: + tarinfo.size = statres.st_size + else: + tarinfo.size = 0L + tarinfo.mtime = statres.st_mtime + tarinfo.type = type + tarinfo.linkname = linkname + if pwd: + try: + tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] + except KeyError: + pass + if grp: + try: + tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] + except KeyError: + pass + + if type in (CHRTYPE, BLKTYPE): + if hasattr(os, "major") and hasattr(os, "minor"): + tarinfo.devmajor = os.major(statres.st_rdev) + tarinfo.devminor = os.minor(statres.st_rdev) + return tarinfo + + def list(self, verbose=True): + """Print a table of contents to sys.stdout. If `verbose' is False, only + the names of the members are printed. If it is True, an `ls -l'-like + output is produced. + """ + self._check() + + for tarinfo in self: + if verbose: + print filemode(tarinfo.mode), + print "%s/%s" % (tarinfo.uname or tarinfo.uid, + tarinfo.gname or tarinfo.gid), + if tarinfo.ischr() or tarinfo.isblk(): + print "%10s" % ("%d,%d" \ + % (tarinfo.devmajor, tarinfo.devminor)), + else: + print "%10d" % tarinfo.size, + print "%d-%02d-%02d %02d:%02d:%02d" \ + % time.localtime(tarinfo.mtime)[:6], + + print tarinfo.name + ("/" if tarinfo.isdir() else ""), + + if verbose: + if tarinfo.issym(): + print "->", tarinfo.linkname, + if tarinfo.islnk(): + print "link to", tarinfo.linkname, + print + + def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): + """Add the file `name' to the archive. `name' may be any type of file + (directory, fifo, symbolic link, etc.). If given, `arcname' + specifies an alternative name for the file in the archive. + Directories are added recursively by default. This can be avoided by + setting `recursive' to False. `exclude' is a function that should + return True for each filename to be excluded. `filter' is a function + that expects a TarInfo object argument and returns the changed + TarInfo object, if it returns None the TarInfo object will be + excluded from the archive. + """ + self._check("aw") + + if arcname is None: + arcname = name + + # Exclude pathnames. + if exclude is not None: + import warnings + warnings.warn("use the filter argument instead", + DeprecationWarning, 2) + if exclude(name): + self._dbg(2, "tarfile: Excluded %r" % name) + return + + # Skip if somebody tries to archive the archive... + if self.name is not None and os.path.abspath(name) == self.name: + self._dbg(2, "tarfile: Skipped %r" % name) + return + + self._dbg(1, name) + + # Create a TarInfo object from the file. + tarinfo = self.gettarinfo(name, arcname) + + if tarinfo is None: + self._dbg(1, "tarfile: Unsupported type %r" % name) + return + + # Change or exclude the TarInfo object. + if filter is not None: + tarinfo = filter(tarinfo) + if tarinfo is None: + self._dbg(2, "tarfile: Excluded %r" % name) + return + + # Append the tar header and data to the archive. + if tarinfo.isreg(): + with bltn_open(name, "rb") as f: + self.addfile(tarinfo, f) + + elif tarinfo.isdir(): + self.addfile(tarinfo) + if recursive: + for f in os.listdir(name): + self.add(os.path.join(name, f), os.path.join(arcname, f), + recursive, exclude, filter) + + else: + self.addfile(tarinfo) + + def addfile(self, tarinfo, fileobj=None): + """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is + given, tarinfo.size bytes are read from it and added to the archive. + You can create TarInfo objects directly, or by using gettarinfo(). + On Windows platforms, `fileobj' should always be opened with mode + 'rb' to avoid irritation about the file size. + """ + self._check("aw") + + tarinfo = copy.copy(tarinfo) + + buf = tarinfo.tobuf(self.format, self.encoding, self.errors) + self.fileobj.write(buf) + self.offset += len(buf) + + # If there's data to follow, append it. + if fileobj is not None: + copyfileobj(fileobj, self.fileobj, tarinfo.size) + blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) + if remainder > 0: + self.fileobj.write(NUL * (BLOCKSIZE - remainder)) + blocks += 1 + self.offset += blocks * BLOCKSIZE + + self.members.append(tarinfo) + + def extractall(self, path=".", members=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). + """ + directories = [] + + if members is None: + members = self + + for tarinfo in members: + if tarinfo.isdir(): + # Extract directories with a safe mode. + directories.append(tarinfo) + tarinfo = copy.copy(tarinfo) + tarinfo.mode = 0700 + self.extract(tarinfo, path) + + # Reverse sort directories. + directories.sort(key=operator.attrgetter('name')) + directories.reverse() + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: + dirpath = os.path.join(path, tarinfo.name) + try: + self.chown(tarinfo, dirpath) + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError, e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + def extract(self, member, path=""): + """Extract a member from the archive to the current working directory, + using its full name. Its file information is extracted as accurately + as possible. `member' may be a filename or a TarInfo object. You can + specify a different directory using `path'. + """ + self._check("r") + + if isinstance(member, basestring): + tarinfo = self.getmember(member) + else: + tarinfo = member + + # Prepare the link target for makelink(). + if tarinfo.islnk(): + tarinfo._link_target = os.path.join(path, tarinfo.linkname) + + try: + self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) + except EnvironmentError, e: + if self.errorlevel > 0: + raise + else: + if e.filename is None: + self._dbg(1, "tarfile: %s" % e.strerror) + else: + self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) + except ExtractError, e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + def extractfile(self, member): + """Extract a member from the archive as a file object. `member' may be + a filename or a TarInfo object. If `member' is a regular file, a + file-like object is returned. If `member' is a link, a file-like + object is constructed from the link's target. If `member' is none of + the above, None is returned. + The file-like object is read-only and provides the following + methods: read(), readline(), readlines(), seek() and tell() + """ + self._check("r") + + if isinstance(member, basestring): + tarinfo = self.getmember(member) + else: + tarinfo = member + + if tarinfo.isreg(): + return self.fileobject(self, tarinfo) + + elif tarinfo.type not in SUPPORTED_TYPES: + # If a member's type is unknown, it is treated as a + # regular file. + return self.fileobject(self, tarinfo) + + elif tarinfo.islnk() or tarinfo.issym(): + if isinstance(self.fileobj, _Stream): + # A small but ugly workaround for the case that someone tries + # to extract a (sym)link as a file-object from a non-seekable + # stream of tar blocks. + raise StreamError("cannot extract (sym)link as file object") + else: + # A (sym)link's file object is its target's file object. + return self.extractfile(self._find_link_target(tarinfo)) + else: + # If there's no data associated with the member (directory, chrdev, + # blkdev, etc.), return None instead of a file object. + return None + + def _extract_member(self, tarinfo, targetpath): + """Extract the TarInfo object tarinfo to a physical + file called targetpath. + """ + # Fetch the TarInfo object for the given name + # and build the destination pathname, replacing + # forward slashes to platform specific separators. + targetpath = targetpath.rstrip("/") + targetpath = targetpath.replace("/", os.sep) + + # Create all upper directories. + upperdirs = os.path.dirname(targetpath) + if upperdirs and not os.path.exists(upperdirs): + # Create directories that are not part of the archive with + # default permissions. + os.makedirs(upperdirs) + + if tarinfo.islnk() or tarinfo.issym(): + self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) + else: + self._dbg(1, tarinfo.name) + + if tarinfo.isreg(): + self.makefile(tarinfo, targetpath) + elif tarinfo.isdir(): + self.makedir(tarinfo, targetpath) + elif tarinfo.isfifo(): + self.makefifo(tarinfo, targetpath) + elif tarinfo.ischr() or tarinfo.isblk(): + self.makedev(tarinfo, targetpath) + elif tarinfo.islnk() or tarinfo.issym(): + self.makelink(tarinfo, targetpath) + elif tarinfo.type not in SUPPORTED_TYPES: + self.makeunknown(tarinfo, targetpath) + else: + self.makefile(tarinfo, targetpath) + + self.chown(tarinfo, targetpath) + if not tarinfo.issym(): + self.chmod(tarinfo, targetpath) + self.utime(tarinfo, targetpath) + + #-------------------------------------------------------------------------- + # Below are the different file methods. They are called via + # _extract_member() when extract() is called. They can be replaced in a + # subclass to implement other functionality. + + def makedir(self, tarinfo, targetpath): + """Make a directory called targetpath. + """ + try: + # Use a safe mode for the directory, the real mode is set + # later in _extract_member(). + os.mkdir(targetpath, 0700) + except EnvironmentError, e: + if e.errno != errno.EEXIST: + raise + + def makefile(self, tarinfo, targetpath): + """Make a file called targetpath. + """ + source = self.extractfile(tarinfo) + try: + with bltn_open(targetpath, "wb") as target: + copyfileobj(source, target) + finally: + source.close() + + def makeunknown(self, tarinfo, targetpath): + """Make a file from a TarInfo object with an unknown type + at targetpath. + """ + self.makefile(tarinfo, targetpath) + self._dbg(1, "tarfile: Unknown file type %r, " \ + "extracted as regular file." % tarinfo.type) + + def makefifo(self, tarinfo, targetpath): + """Make a fifo called targetpath. + """ + if hasattr(os, "mkfifo"): + os.mkfifo(targetpath) + else: + raise ExtractError("fifo not supported by system") + + def makedev(self, tarinfo, targetpath): + """Make a character or block device called targetpath. + """ + if not hasattr(os, "mknod") or not hasattr(os, "makedev"): + raise ExtractError("special devices not supported by system") + + mode = tarinfo.mode + if tarinfo.isblk(): + mode |= stat.S_IFBLK + else: + mode |= stat.S_IFCHR + + os.mknod(targetpath, mode, + os.makedev(tarinfo.devmajor, tarinfo.devminor)) + + def makelink(self, tarinfo, targetpath): + """Make a (symbolic) link called targetpath. If it cannot be created + (platform limitation), we try to make a copy of the referenced file + instead of a link. + """ + if hasattr(os, "symlink") and hasattr(os, "link"): + # For systems that support symbolic and hard links. + if tarinfo.issym(): + if os.path.lexists(targetpath): + os.unlink(targetpath) + os.symlink(tarinfo.linkname, targetpath) + else: + # See extract(). + if os.path.exists(tarinfo._link_target): + if os.path.lexists(targetpath): + os.unlink(targetpath) + os.link(tarinfo._link_target, targetpath) + else: + self._extract_member(self._find_link_target(tarinfo), targetpath) + else: + try: + self._extract_member(self._find_link_target(tarinfo), targetpath) + except KeyError: + raise ExtractError("unable to resolve link inside archive") + + def chown(self, tarinfo, targetpath): + """Set owner of targetpath according to tarinfo. + """ + if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: + # We have to be root to do so. + try: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + g = tarinfo.gid + try: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + u = tarinfo.uid + try: + if tarinfo.issym() and hasattr(os, "lchown"): + os.lchown(targetpath, u, g) + else: + if sys.platform != "os2emx": + os.chown(targetpath, u, g) + except EnvironmentError, e: + raise ExtractError("could not change owner") + + def chmod(self, tarinfo, targetpath): + """Set file permissions of targetpath according to tarinfo. + """ + if hasattr(os, 'chmod'): + try: + os.chmod(targetpath, tarinfo.mode) + except EnvironmentError, e: + raise ExtractError("could not change mode") + + def utime(self, tarinfo, targetpath): + """Set modification time of targetpath according to tarinfo. + """ + if not hasattr(os, 'utime'): + return + try: + os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) + except EnvironmentError, e: + raise ExtractError("could not change modification time") + + #-------------------------------------------------------------------------- + def next(self): + """Return the next member of the archive as a TarInfo object, when + TarFile is opened for reading. Return None if there is no more + available. + """ + self._check("ra") + if self.firstmember is not None: + m = self.firstmember + self.firstmember = None + return m + + # Advance the file pointer. + if self.offset != self.fileobj.tell(): + self.fileobj.seek(self.offset - 1) + if not self.fileobj.read(1): + raise ReadError("unexpected end of data") + + # Read the next block. + tarinfo = None + while True: + try: + tarinfo = self.tarinfo.fromtarfile(self) + except EOFHeaderError, e: + if self.ignore_zeros: + self._dbg(2, "0x%X: %s" % (self.offset, e)) + self.offset += BLOCKSIZE + continue + except InvalidHeaderError, e: + if self.ignore_zeros: + self._dbg(2, "0x%X: %s" % (self.offset, e)) + self.offset += BLOCKSIZE + continue + elif self.offset == 0: + raise ReadError(str(e)) + except EmptyHeaderError: + if self.offset == 0: + raise ReadError("empty file") + except TruncatedHeaderError, e: + if self.offset == 0: + raise ReadError(str(e)) + except SubsequentHeaderError, e: + raise ReadError(str(e)) + break + + if tarinfo is not None: + self.members.append(tarinfo) + else: + self._loaded = True + + return tarinfo + + #-------------------------------------------------------------------------- + # Little helper methods: + + def _getmember(self, name, tarinfo=None, normalize=False): + """Find an archive member by name from bottom to top. + If tarinfo is given, it is used as the starting point. + """ + # Ensure that all members have been loaded. + members = self.getmembers() + + # Limit the member search list up to tarinfo. + if tarinfo is not None: + members = members[:members.index(tarinfo)] + + if normalize: + name = os.path.normpath(name) + + for member in reversed(members): + if normalize: + member_name = os.path.normpath(member.name) + else: + member_name = member.name + + if name == member_name: + return member + + def _load(self): + """Read through the entire archive file and look for readable + members. + """ + while True: + tarinfo = self.next() + if tarinfo is None: + break + self._loaded = True + + def _check(self, mode=None): + """Check if TarFile is still open, and if the operation's mode + corresponds to TarFile's mode. + """ + if self.closed: + raise IOError("%s is closed" % self.__class__.__name__) + if mode is not None and self.mode not in mode: + raise IOError("bad operation for mode %r" % self.mode) + + def _find_link_target(self, tarinfo): + """Find the target member of a symlink or hardlink member in the + archive. + """ + if tarinfo.issym(): + # Always search the entire archive. + linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) + limit = None + else: + # Search the archive before the link, because a hard link is + # just a reference to an already archived file. + linkname = tarinfo.linkname + limit = tarinfo + + member = self._getmember(linkname, tarinfo=limit, normalize=True) + if member is None: + raise KeyError("linkname %r not found" % linkname) + return member + + def __iter__(self): + """Provide an iterator object. + """ + if self._loaded: + return iter(self.members) + else: + return TarIter(self) + + def _dbg(self, level, msg): + """Write debugging output to sys.stderr. + """ + if level <= self.debug: + print >> sys.stderr, msg + + def __enter__(self): + self._check() + return self + + def __exit__(self, type, value, traceback): + if type is None: + self.close() + else: + # An exception occurred. We must not call close() because + # it would try to write end-of-archive blocks and padding. + if not self._extfileobj: + self.fileobj.close() + self.closed = True +# class TarFile + +class TarIter: + """Iterator Class. + + for tarinfo in TarFile(...): + suite... + """ + + def __init__(self, tarfile): + """Construct a TarIter object. + """ + self.tarfile = tarfile + self.index = 0 + def __iter__(self): + """Return iterator object. + """ + return self + def next(self): + """Return the next item using TarFile's next() method. + When all members have been read, set TarFile as _loaded. + """ + # Fix for SF #1100429: Under rare circumstances it can + # happen that getmembers() is called during iteration, + # which will cause TarIter to stop prematurely. + + if self.index == 0 and self.tarfile.firstmember is not None: + tarinfo = self.tarfile.next() + elif self.index < len(self.tarfile.members): + tarinfo = self.tarfile.members[self.index] + elif not self.tarfile._loaded: + tarinfo = self.tarfile.next() + if not tarinfo: + self.tarfile._loaded = True + raise StopIteration + else: + raise StopIteration + self.index += 1 + return tarinfo + +# Helper classes for sparse file support +class _section: + """Base class for _data and _hole. + """ + def __init__(self, offset, size): + self.offset = offset + self.size = size + def __contains__(self, offset): + return self.offset <= offset < self.offset + self.size + +class _data(_section): + """Represent a data section in a sparse file. + """ + def __init__(self, offset, size, realpos): + _section.__init__(self, offset, size) + self.realpos = realpos + +class _hole(_section): + """Represent a hole section in a sparse file. + """ + pass + +class _ringbuffer(list): + """Ringbuffer class which increases performance + over a regular list. + """ + def __init__(self): + self.idx = 0 + def find(self, offset): + idx = self.idx + while True: + item = self[idx] + if offset in item: + break + idx += 1 + if idx == len(self): + idx = 0 + if idx == self.idx: + # End of File + return None + self.idx = idx + return item + +#--------------------------------------------- +# zipfile compatible TarFile class +#--------------------------------------------- +TAR_PLAIN = 0 # zipfile.ZIP_STORED +TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED +class TarFileCompat: + """TarFile class compatible with standard module zipfile's + ZipFile class. + """ + def __init__(self, file, mode="r", compression=TAR_PLAIN): + from warnings import warnpy3k + warnpy3k("the TarFileCompat class has been removed in Python 3.0", + stacklevel=2) + if compression == TAR_PLAIN: + self.tarfile = TarFile.taropen(file, mode) + elif compression == TAR_GZIPPED: + self.tarfile = TarFile.gzopen(file, mode) + else: + raise ValueError("unknown compression constant") + if mode[0:1] == "r": + members = self.tarfile.getmembers() + for m in members: + m.filename = m.name + m.file_size = m.size + m.date_time = time.gmtime(m.mtime)[:6] + def namelist(self): + return map(lambda m: m.name, self.infolist()) + def infolist(self): + return filter(lambda m: m.type in REGULAR_TYPES, + self.tarfile.getmembers()) + def printdir(self): + self.tarfile.list() + def testzip(self): + return + def getinfo(self, name): + return self.tarfile.getmember(name) + def read(self, name): + return self.tarfile.extractfile(self.tarfile.getmember(name)).read() + def write(self, filename, arcname=None, compress_type=None): + self.tarfile.add(filename, arcname) + def writestr(self, zinfo, bytes): + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + import calendar + tinfo = TarInfo(zinfo.filename) + tinfo.size = len(bytes) + tinfo.mtime = calendar.timegm(zinfo.date_time) + self.tarfile.addfile(tinfo, StringIO(bytes)) + def close(self): + self.tarfile.close() +#class TarFileCompat + +#-------------------- +# exported functions +#-------------------- +def is_tarfile(name): + """Return True if name points to a tar archive that we + are able to handle, else return False. + """ + try: + t = open(name) + t.close() + return True + except TarError: + return False + +open = TarFile.open