Skip to content

Commit 3bf3732

Browse files
committed
First cut at code for extracting translation tasks as XLIFF.
1 parent a374e2e commit 3bf3732

1 file changed

Lines changed: 153 additions & 0 deletions

File tree

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#lang racket/base
2+
3+
; Generate XLIFF templates for translations
4+
5+
(require racket/match
6+
xml
7+
(only-in racket/list partition range)
8+
(only-in racket/string string-trim)
9+
file/glob
10+
racket/path)
11+
(require "match-xml.rkt")
12+
13+
(struct translations
14+
(localizeds)
15+
#:transparent)
16+
17+
(struct localized
18+
(text language outdated?)
19+
#:transparent)
20+
21+
(define (text-element? content)
22+
(match content
23+
((element _start _stop 'text _attributes _contents) #t)
24+
(else #f)))
25+
26+
(define (collect-translations content)
27+
(match content
28+
((element _start _stop name attributes contents)
29+
(let-values (((texts non-texts)
30+
(partition text-element? contents)))
31+
(if (null? texts)
32+
(apply append (map collect-translations contents))
33+
(cons (translations (map text->localized texts))
34+
(apply append (map collect-translations contents))))))
35+
(else '())))
36+
37+
; returns lang or #f
38+
(define (text->localized xml)
39+
(match xml
40+
((element* text
41+
((xml:lang lang)
42+
(outdated outdated-value))
43+
text)
44+
(localized (string-trim text)
45+
lang
46+
(equal? outdated-value "outdated")))))
47+
48+
49+
(define structure-tags
50+
'(pickQuestion
51+
categoryQuestion
52+
history lg stem pickOptions option explanation
53+
categoryStatements categories category statements statement))
54+
55+
(define cleanup
56+
(eliminate-whitespace structure-tags values))
57+
58+
(define (question-id element)
59+
(match element
60+
((element* pickQuestion ((id id)) ()) id)
61+
((element* categoryQuestion ((id id)) ()) id)))
62+
63+
; returns id, translations
64+
(define (document-collect-translations xml)
65+
(match xml
66+
((document _ element _)
67+
(let ((question (cleanup element)))
68+
(values (question-id question)
69+
(collect-translations question))))))
70+
71+
(define (file-collect-translations filename)
72+
(let ((document (call-with-input-file filename read-xml)))
73+
(document-collect-translations document)))
74+
75+
(define (element% name attributes contents)
76+
(element #f #f name attributes contents))
77+
78+
(define (attribute% name value)
79+
(attribute #f #f name value))
80+
81+
(define (pcdata% text)
82+
(pcdata #f #f text))
83+
84+
(define (partition-translation source-language target-language translation)
85+
(let ((localizeds (translations-localizeds translation)))
86+
(values (findf (lambda (localized) (equal? (localized-language localized) source-language)) localizeds)
87+
(findf (lambda (localized) (equal? (localized-language localized) target-language)) localizeds)
88+
(filter (lambda (localized)
89+
(let ((lang (localized-language localized)))
90+
(and (not (equal? lang source-language))
91+
(not (equal? lang target-language)))))
92+
localizeds))))
93+
94+
(define (text-body localized)
95+
(list (pcdata% (localized-text localized))))
96+
97+
; FIXME: what if no translations are needed
98+
99+
(define (translations->xliff-file source-language target-language filename id translations)
100+
(let ((trans-units
101+
(filter
102+
values
103+
(map (lambda (index translation)
104+
(let-values (((source-localized target-localized rest-localizeds)
105+
(partition-translation source-language target-language translation)))
106+
(if (and target-localized
107+
(equal? "" (localized-text target-localized))
108+
(not (localized-outdated? target-localized)))
109+
#f ; nothing to do
110+
(element% 'trans-unit
111+
(list (attribute% 'id (string-append id "/" (number->string index))))
112+
(list*
113+
(element% 'source '() (text-body source-localized))
114+
(element% 'target '()
115+
(list (comment "translation goes here")))
116+
(map (lambda (localized)
117+
(element% 'alt-trans '()
118+
(list
119+
(element% 'target
120+
(list (attribute% 'xml:lang (localized-language localized)))
121+
(text-body localized)))))
122+
rest-localizeds))))))
123+
(range (length translations))
124+
translations))))
125+
(and (pair? trans-units)
126+
(element% 'file
127+
(list (attribute% 'original filename)
128+
(attribute% 'source-language source-language)
129+
(attribute% 'target-language target-language)
130+
(attribute% 'datatype "plaintext"))
131+
(list (element% 'body
132+
'()
133+
trans-units))))))
134+
135+
(define (file->xliff source-language target-language filename)
136+
(let-values (((id translations) (file-collect-translations filename))
137+
((base name must-be-dir?) (split-path filename)))
138+
(translations->xliff-file source-language target-language (path->string name) id translations)))
139+
140+
(define (files->xliff-document source-language target-language filenames)
141+
(document (prolog (list (p-i #f #f 'xml "version=\"1.0\" encoding=\"UTF-8\""))
142+
#f
143+
'())
144+
(element% 'xliff
145+
(list
146+
(attribute% 'version "1.2")
147+
(attribute% 'xmlns "urn:oasis:names:tc:xliff:document:1.2"))
148+
(filter values
149+
(map (lambda (filename)
150+
(file->xliff source-language target-language filename))
151+
filenames)))
152+
'()))
153+

0 commit comments

Comments
 (0)