1 /* Title: Pure/General/symbol.scala
2 Author: Makarius
3
4 Detecting and recoding Isabelle symbols.
5 */
6
7 package isabelle
8
9 import scala.io.Source
10 import scala.collection.mutable
11 import scala.util.matching.Regex
12
13
14 object Symbol
15 {
16 type Symbol = String
17
18
19 /* spaces */
20
21 val spc = ' '
22 val space = " "
23
24 private val static_spaces = space * 4000
25
26 def spaces(k: Int ): String =
27 {
28 require(k >= 0)
29 if (k < static_spaces.length) static_spaces.substring(0, k)
30 else space * k
31 }
32
33
34 /* ASCII characters */
35
36 def is_ascii_letter(c: Char ): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
37 def is_ascii_digit(c: Char ): Boolean = '0' <= c && c <= '9'
38 def is_ascii_quasi(c: Char ): Boolean = c == '_' || c == '\' '
39
40 def is_ascii_letdig(c: Char ): Boolean =
41 is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
42
43 def is_ascii_identifier(s: String): Boolean =
44 s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig)
45
46
47 /* Symbol regexps */
48
49 private val plain = new Regex("" "(?xs)
50 [^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] "" ")
51
52 private val physical_newline = new Regex("" "(?xs) \n | \r\n | \r " "" )
53
54 private val symbol = new Regex("" "(?xs)
55 \\ < (?:
56 \^? [A-Za-z][A-Za-z0-9_']* |
57 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >"" ")
58
59 private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
60 "" " [\ud800-\udbff\ufffd] | \\<\^? " "" )
61
62 val regex_total =
63 new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| ." )
64
65
66 /* basic matching */
67
68 def is_plain(c: Char ): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff' )
69
70 def is_physical_newline(s: Symbol): Boolean =
71 s == "\n" || s == "\r" || s == "\r\n"
72
73 def is_malformed(s: Symbol): Boolean =
74 !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
75
76 class Matcher(text: CharSequence)
77 {
78 private val matcher = regex_total.pattern.matcher(text)
79 def apply(start: Int , end: Int ): Int =
80 {
81 require(0 <= start && start < end && end <= text.length)
82 if (is_plain(text.charAt(start))) 1
83 else {
84 matcher.region(start, end).lookingAt
85 matcher.group.length
86 }
87 }
88 }
89
90
91 /* iterator */
92
93 private val char_symbols: Array[Symbol] =
94 (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
95
96 def iterator(text: CharSequence): Iterator[Symbol] =
97 new Iterator[Symbol]
98 {
99 private val matcher = new Matcher(text)
100 private var i = 0
101 def hasNext = i < text.length
102 def next =
103 {
104 val n = matcher(i, text.length)
105 val s =
106 if (n == 0) ""
107 else if (n == 1) {
108 val c = text.charAt(i)
109 if (c < char_symbols.length) char_symbols(c)
110 else text.subSequence(i, i + n).toString
111 }
112 else text.subSequence(i, i + n).toString
113 i += n
114 s
115 }
116 }
117
118 def explode(text: CharSequence): List[Symbol] = iterator(text).toList
119
120
121 /* decoding offsets */
122
123 class Index(text: CharSequence)
124 {
125 sealed case class Entry(chr: Int , sym: Int )
126 val index: Array[Entry] =
127 {
128 val matcher = new Matcher(text)
129 val buf = new mutable .ArrayBuffer[Entry]
130 var chr = 0
131 var sym = 0
132 while (chr < text.length) {
133 val n = matcher(chr, text.length)
134 chr += n
135 sym += 1
136 if (n > 1) buf += Entry(chr, sym)
137 }
138 buf.toArray
139 }
140 def decode(sym1: Int ): Int =
141 {
142 val sym = sym1 - 1
143 val end = index.length
144 def bisect(a: Int , b: Int ): Int =
145 {
146 if (a < b) {
147 val c = (a + b) / 2
148 if (sym < index(c).sym) bisect(a, c)
149 else if (c + 1 == end || sym < index(c + 1).sym) c
150 else bisect(c + 1, b)
151 }
152 else -1
153 }
154 val i = bisect(0, end)
155 if (i < 0) sym
156 else index(i).chr + sym - index(i).sym
157 }
158 def decode(range: Text.Range): Text.Range = range.map(decode(_))
159 }
160
161
162 /* recoding text */
163
164 private class Recoder(list: List[(String, String)])
165 {
166 private val (min, max) =
167 {
168 var min = '\uffff'
169 var max = '\u0000'
170 for ((x, _) <- list) {
171 val c = x(0)
172 if (c < min) min = c
173 if (c > max) max = c
174 }
175 (min, max)
176 }
177 private val table =
178 {
179 var tab = Map[String, String]()
180 for ((x, y) <- list) {
181 tab.get(x) match {
182 case None => tab += (x -> y)
183 case Some(z) =>
184 error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
185 }
186 }
187 tab
188 }
189 def recode(text: String): String =
190 {
191 val len = text.length
192 val matcher = regex_total.pattern.matcher(text)
193 val result = new StringBuilder(len)
194 var i = 0
195 while (i < len) {
196 val c = text(i)
197 if (min <= c && c <= max) {
198 matcher.region(i, len).lookingAt
199 val x = matcher.group
200 result.append(table.get(x) getOrElse x)
201 i = matcher.end
202 }
203 else { result.append(c); i += 1 }
204 }
205 result.toString
206 }
207 }
208
209
210
211 /** symbol interpretation **/
212
213 private lazy val symbols =
214 new Interpretation(
215 Isabelle_System.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS" ))))
216
217 private class Interpretation(symbols_spec: String)
218 {
219 /* read symbols */
220
221 private val empty = new Regex("" "(?xs) ^\s* (?: \#.* )? $ " "" )
222 private val key = new Regex("" "(?xs) (.+): " "" )
223
224 private def read_decl(decl: String): (Symbol, Map[String, String]) =
225 {
226 def err() = error("Bad symbol declaration: " + decl)
227
228 def read_props(props: List[String]): Map[String, String] =
229 {
230 props match {
231 case Nil => Map()
232 case _ :: Nil => err()
233 case key(x) :: y :: rest => read_props(rest) + (x -> y)
234 case _ => err()
235 }
236 }
237 decl.split("\\s+" ).toList match {
238 case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
239 case _ => err()
240 }
241 }
242
243 private val symbols: List[(Symbol, Map[String, String])] =
244 Map((
245 for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches)
246 yield read_decl(decl)): _*) toList
247
248
249 /* misc properties */
250
251 val names: Map[Symbol, String] =
252 {
253 val name = new Regex("" "\\<\^?([A-Za-z][A-Za-z0-9_']*)>" "" )
254 Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
255 }
256
257 val abbrevs: Map[Symbol, String] =
258 Map((
259 for ((sym, props) <- symbols if props.isDefinedAt("abbrev" ))
260 yield (sym -> props("abbrev" ))): _*)
261
262
263 /* recoding */
264
265 private val (decoder, encoder) =
266 {
267 val mapping =
268 for {
269 (sym, props) <- symbols
270 val code =
271 try { Integer.decode(props("code" )).intValue }
272 catch {
273 case _: NoSuchElementException => error("Missing code for symbol " + sym)
274 case _: NumberFormatException => error("Bad code for symbol " + sym)
275 }
276 val ch = new String(Character.toChars(code))
277 } yield {
278 if (code < 128) error("Illegal ASCII code for symbol " + sym)
279 else (sym, ch)
280 }
281 (new Recoder(mapping),
282 new Recoder(mapping map { case (x, y) => (y, x) }))
283 }
284
285 def decode(text: String): String = decoder.recode(text)
286 def encode(text: String): String = encoder.recode(text)
287
288 private def recode_set(elems: String*): Set[String] =
289 {
290 val content = elems.toList
291 Set((content ::: content.map(decode)): _*)
292 }
293
294 private def recode_map[A](elems: (String, A)*): Map[String, A] =
295 {
296 val content = elems.toList
297 Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
298 }
299
300
301 /* user fonts */
302
303 val fonts: Map[Symbol, String] =
304 recode_map((
305 for ((sym, props) <- symbols if props.isDefinedAt("font" ))
306 yield (sym -> props("font" ))): _*)
307
308 val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
309 val font_index: Map[String, Int ] = Map((font_names zip (0 until font_names.length).toList): _*)
310
311
312 /* classification */
313
314 val letters = recode_set(
315 "A" , "B" , "C" , "D" , "E" , "F" , "G" , "H" , "I" , "J" , "K" , "L" , "M" ,
316 "N" , "O" , "P" , "Q" , "R" , "S" , "T" , "U" , "V" , "W" , "X" , "Y" , "Z" ,
317 "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" , "i" , "j" , "k" , "l" , "m" ,
318 "n" , "o" , "p" , "q" , "r" , "s" , "t" , "u" , "v" , "w" , "x" , "y" , "z" ,
319
320 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
321 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
322 "\\" , "\\"
, "\\" , "\\" , "\\" , "\\" , "\\" ,
323 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
324 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
325 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" , "\\"
,
326 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
327 "\\" , "\\" , "\\" ,
328
329 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
330 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
331 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
332 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
333 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
334 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
335 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
336 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
337 "\\" , "\\" , "\\" , "\\" ,
338
339 "\\" , "\\" , "\\" , "\\" , "\\" ,
340 "\\" , "\\" , "\\" , "\\" , "\\" ,
341 "\\" , "\\" , "\\" , "\\" , "\\" , "\\" ,
342 "\\" , "\\" , "\\" , "\\" , "\\" ,
343 "\\" , "\\" , "\\" , "\\" , "\\" ,
344 "\\" , "\\" , "\\" , "\\" , "\\" ,
345 "\\" , "\\" ,
346
347 "\\<^isub>" , "\\<^isup>" )
348
349 val blanks =
350 recode_set(space, "\t" , "\n" , "\u000B" , "\f" , "\r" , "\\" , "\\<^newline>" )
351
352 val sym_chars =
353 Set("!" , "#" , "$" , "%" , "&" , "*" , "+" , "-" , "/" , "<" , "=" , ">" , "?" , "@" , "^" , "_" , "|" , "~" )
354
355 val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
356
357
358 /* control symbols */
359
360 val ctrl_decoded: Set[Symbol] =
361 Set((for ((sym, _) <- symbols if sym.startsWith("\\<^" )) yield decode(sym)): _*)
362
363 val sub_decoded = decode("\\<^sub>" )
364 val sup_decoded = decode("\\<^sup>" )
365 val isub_decoded = decode("\\<^isub>" )
366 val isup_decoded = decode("\\<^isup>" )
367 val bsub_decoded = decode("\\<^bsub>" )
368 val esub_decoded = decode("\\<^esub>" )
369 val bsup_decoded = decode("\\<^bsup>" )
370 val esup_decoded = decode("\\<^esup>" )
371 val bold_decoded = decode("\\<^bold>" )
372 }
373
374
375 /* tables */
376
377 def names: Map[Symbol, String] = symbols.names
378 def abbrevs: Map[Symbol, String] = symbols.abbrevs
379
380 def decode(text: String): String = symbols.decode(text)
381 def encode(text: String): String = symbols.encode(text)
382
383 def fonts: Map[Symbol, String] = symbols.fonts
384 def font_names: List[String] = symbols.font_names
385 def font_index: Map[String, Int ] = symbols.font_index
386 def lookup_font(sym: Symbol): Option[Int ] = symbols.fonts.get(sym).map(font_index(_))
387
388
389 /* classification */
390
391 def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)
392 def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
393 def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"
394 def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
395 def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
396
397 def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
398 def is_symbolic(sym: Symbol): Boolean = raw_symbolic(sym) || symbols.symbolic.contains(sym)
399
400 private def raw_symbolic(sym: Symbol): Boolean =
401 sym.startsWith("\\<" ) && sym.endsWith(">" ) && !sym.startsWith("\\<^" )
402
403
404
405
406 /* control symbols */
407
408 def is_ctrl(sym: Symbol): Boolean =
409 sym.startsWith("\\<^" ) || symbols.ctrl_decoded.contains(sym)
410
411 def is_controllable(sym: Symbol): Boolean =
412 !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
413
414 def sub_decoded: Symbol = symbols.sub_decoded
415 def sup_decoded: Symbol = symbols.sup_decoded
416 def isub_decoded: Symbol = symbols.isub_decoded
417 def isup_decoded: Symbol = symbols.isup_decoded
418 def bsub_decoded: Symbol = symbols.bsub_decoded
419 def esub_decoded: Symbol = symbols.esub_decoded
420 def bsup_decoded: Symbol = symbols.bsup_decoded
421 def esup_decoded: Symbol = symbols.esup_decoded
422 def bold_decoded: Symbol = symbols.bold_decoded
423 }
quality 91%
¤ Dauer der Verarbeitung: 0.3 Sekunden
(vorverarbeitet)
¤
*© Formatika GbR, Deutschland