Skip to content

Commit ad177c8

Browse files
committed
[WIP] unicode support
1 parent 4d788a9 commit ad177c8

File tree

4 files changed

+70
-37
lines changed

4 files changed

+70
-37
lines changed

pyrepl/historical_reader.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def prepare(self):
256256
def get_prompt(self, lineno, cursor_on_line):
257257
if cursor_on_line and self.isearch_direction != ISEARCH_DIRECTION_NONE:
258258
d = 'rf'[self.isearch_direction == ISEARCH_DIRECTION_FORWARDS]
259-
return "(%s-search `%s') "%(d, self.isearch_term)
259+
return u"(%s-search `%s') "%(d, self.isearch_term)
260260
else:
261261
return super(HistoricalReader, self).get_prompt(lineno, cursor_on_line)
262262

pyrepl/reader.py

+29-14
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,27 @@
2020
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
2121

2222
from __future__ import unicode_literals
23+
import sys
2324
import unicodedata
2425
from pyrepl import commands
2526
from pyrepl import input
2627
try:
2728
unicode
29+
def decode(x, enc = sys.stdout.encoding):
30+
if not isinstance(x, unicode):
31+
return unicode(x, enc)
32+
return x
2833
except NameError:
2934
unicode = str
3035
unichr = chr
3136
basestring = bytes, str
37+
decode = lambda x, _ = None: x
38+
39+
40+
def width(c):
41+
return 2 if unicodedata.east_asian_width(c) in "FW" else 1
42+
def wlen(s):
43+
return sum(map(width, s))
3244

3345

3446
def _make_unctrl_map():
@@ -39,8 +51,8 @@ def _make_unctrl_map():
3951
for i in range(32):
4052
c = unichr(i)
4153
uc_map[c] = '^' + unichr(ord('A') + i - 1)
42-
uc_map[b'\t'] = ' ' # display TABs as 4 characters
43-
uc_map[b'\177'] = unicode('^?')
54+
uc_map['\t'] = ' ' # display TABs as 4 characters
55+
uc_map['\177'] = unicode('^?')
4456
for i in range(256):
4557
c = unichr(i)
4658
if c not in uc_map:
@@ -53,7 +65,7 @@ def _my_unctrl(c, u=_make_unctrl_map()):
5365
return u[c]
5466
else:
5567
if unicodedata.category(c).startswith('C'):
56-
return br'\u%04x' % ord(c)
68+
return '\\u%04x' % ord(c)
5769
else:
5870
return c
5971

@@ -75,7 +87,7 @@ def disp_str(buffer, join=''.join, uc=_my_unctrl):
7587
s = [uc(x) for x in buffer]
7688
b = [] # XXX: bytearray
7789
for x in s:
78-
b.append(1)
90+
b.append(width(x[0]))
7991
b.extend([0] * (len(x) - 1))
8092
return join(s), b
8193

@@ -280,7 +292,7 @@ def calc_screen(self):
280292
for mline in self.msg.split("\n"):
281293
screen.append(mline)
282294
screeninfo.append((0, []))
283-
self.lxy = p, ln
295+
# self.lxy = p, ln
284296
prompt = self.get_prompt(ln, ll >= p >= 0)
285297
while '\n' in prompt:
286298
pre_prompt, _, prompt = prompt.partition('\n')
@@ -289,8 +301,8 @@ def calc_screen(self):
289301
p -= ll + 1
290302
prompt, lp = self.process_prompt(prompt)
291303
l, l2 = disp_str(line)
292-
wrapcount = (len(l) + lp) // w
293-
if wrapcount == 0:
304+
wrapcount = (wlen(l) + lp) // w
305+
if 1 or wrapcount == 0: # FIXME
294306
screen.append(prompt + l)
295307
screeninfo.append((lp, l2 + [1]))
296308
else:
@@ -318,7 +330,7 @@ def process_prompt(self, prompt):
318330
is returned with these control characters removed. """
319331

320332
out_prompt = ''
321-
l = len(prompt)
333+
l = wlen(prompt)
322334
pos = 0
323335
while True:
324336
s = prompt.find('\x01', pos)
@@ -420,7 +432,7 @@ def get_prompt(self, lineno, cursor_on_line):
420432
# the object on which str() was called. This ensures that even if the
421433
# same object is used e.g. for ps1 and ps2, str() is called only once.
422434
if res not in self._pscache:
423-
self._pscache[res] = str(res)
435+
self._pscache[res] = decode(res)
424436
return self._pscache[res]
425437

426438
def push_input_trans(self, itrans):
@@ -438,23 +450,26 @@ def pos2xy(self, pos):
438450
if pos == len(self.buffer):
439451
y = len(self.screeninfo) - 1
440452
p, l2 = self.screeninfo[y]
441-
return p + len(l2) - 1, y
453+
return p + sum(l2) + l2.count(0) - 1, y
442454
else:
443455
for p, l2 in self.screeninfo:
444-
l = l2.count(1)
456+
l = len(l2) - l2.count(0)
445457
if l > pos:
446458
break
447459
else:
448460
pos -= l
449461
y += 1
450462
c = 0
451463
i = 0
452-
while c < pos:
453-
c += l2[i]
464+
j = 0
465+
while j < pos:
466+
j += 1 if l2[i] else 0
467+
c += l2[i] or 1
454468
i += 1
455469
while l2[i] == 0:
470+
c += 1
456471
i += 1
457-
return p + i, y
472+
return p + c, y
458473

459474
def insert(self, text):
460475
"""Insert 'text' at the insertion point."""

pyrepl/unix_console.py

+39-21
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import re
2929
import time
3030
import sys
31+
import unicodedata
3132
from fcntl import ioctl
3233
from . import curses
3334
from .fancy_termios import tcgetattr, tcsetattr
@@ -44,6 +45,13 @@ class InvalidTerminal(RuntimeError):
4445
except NameError:
4546
unicode = str
4647

48+
49+
def width(c):
50+
return 2 if unicodedata.east_asian_width(c) in "FW" else 1
51+
def wlen(s):
52+
return sum(map(width, s))
53+
54+
4755
_error = (termios.error, curses.error, InvalidTerminal)
4856

4957
# there are arguments for changing this to "refresh"
@@ -247,46 +255,56 @@ def __write_changed_line(self, y, oldline, newline, px):
247255
# structuring this function are equally painful (I'm trying to
248256
# avoid writing code generators these days...)
249257
x = 0
250-
minlen = min(len(oldline), len(newline))
258+
i = 0
259+
minlen = min(wlen(oldline), wlen(newline))
260+
pi = 0
261+
xx = 0
262+
for c in oldline:
263+
xx += width(c)
264+
pi += 1
265+
if xx >= px: break
251266
#
252267
# reuse the oldline as much as possible, but stop as soon as we
253268
# encounter an ESCAPE, because it might be the start of an escape
254269
# sequene
255-
#XXX unicode check!
256-
while x < minlen and oldline[x] == newline[x] and newline[x] != '\x1b':
257-
x += 1
258-
if oldline[x:] == newline[x+1:] and self.ich1:
270+
while x < minlen and oldline[i] == newline[i] and newline[i] != '\x1b':
271+
x += width(newline[i])
272+
i += 1
273+
if oldline[i:] == newline[i+1:] and self.ich1:
259274
if (y == self.__posxy[1] and x > self.__posxy[0] and
260-
oldline[px:x] == newline[px+1:x+1]):
275+
oldline[pi:i] == newline[pi+1:i+1]):
276+
i = pi
261277
x = px
262278
self.__move(x, y)
263-
self.__write_code(self.ich1)
264-
self.__write(newline[x])
265-
self.__posxy = x + 1, y
266-
elif x < minlen and oldline[x + 1:] == newline[x + 1:]:
279+
cw = width(newline[i])
280+
self.__write_code(cw*self.ich1)
281+
self.__write(newline[i])
282+
self.__posxy = x + cw, y
283+
elif (x < minlen and oldline[i + 1:] == newline[i + 1:]
284+
and width(oldline[i]) == width(newline[i])):
267285
self.__move(x, y)
268-
self.__write(newline[x])
269-
self.__posxy = x + 1, y
270-
elif (self.dch1 and self.ich1 and len(newline) == self.width
271-
and x < len(newline) - 2
272-
and newline[x+1:-1] == oldline[x:-2]):
286+
self.__write(newline[i])
287+
self.__posxy = x + width(newline[i]), y
288+
elif (self.dch1 and self.ich1 and wlen(newline) == self.width
289+
and x < wlen(newline) - 2
290+
and newline[i+1:-1] == oldline[i:-2]):
291+
raise NotImplementedError() # FIXME
273292
self.__hide_cursor()
274293
self.__move(self.width - 2, y)
275294
self.__posxy = self.width - 2, y
276295
self.__write_code(self.dch1)
277296
self.__move(x, y)
278297
self.__write_code(self.ich1)
279-
self.__write(newline[x])
280-
self.__posxy = x + 1, y
298+
self.__write(newline[i])
299+
self.__posxy = x + width(newline[i]), y
281300
else:
282301
self.__hide_cursor()
283302
self.__move(x, y)
284-
if len(oldline) > len(newline):
303+
if wlen(oldline) > wlen(newline):
285304
self.__write_code(self._el)
286-
self.__write(newline[x:])
287-
self.__posxy = len(newline), y
305+
self.__write(newline[i:])
306+
self.__posxy = wlen(newline), y
288307

289-
#XXX: check for unicode mess
290308
if '\x1b' in newline:
291309
# ANSI escape characters are present, so we can't assume
292310
# anything about the position of the cursor. Moving the cursor

testing/infrastructure.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class TestReader(Reader):
6464
__test__ = False
6565

6666
def get_prompt(self, lineno, cursor_on_line):
67-
return ''
67+
return u''
6868

6969
def refresh(self):
7070
Reader.refresh(self)

0 commit comments

Comments
 (0)