1
0

erlang.php 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. <?php
  2. /*
  3. * Erlang.
  4. *
  5. * Various comments refer to section numbers in the official spec, which can
  6. * be found at http://www.erlang.org/download/erl_spec47.ps.gz
  7. */
  8. class LuminousErlangScanner extends LuminousSimpleScanner {
  9. // applies interpolation highlighting, can't find a proper
  10. // reference for this though
  11. static function str_filter($token) {
  12. if (strpos($token[1], '~') == false) return $token;
  13. $token = LuminousUtils::escape_token($token);
  14. $token[1] = preg_replace('/~(?:\d+|.)/',
  15. '<INTERPOLATION>$0</INTERPOLATION>', $token[1]);
  16. return $token;
  17. }
  18. // helper function: generates a regex which matches only numeric strings
  19. // in the given base
  20. static function build_based_int_regex($base) {
  21. assert(2 <= $base && $base <= 16);
  22. $regex = '/(?i:[0-';
  23. if ($base <= 10)
  24. $regex .= (string)$base-1;
  25. else
  26. $regex .= '9a-' . strtolower(dechex($base-1));
  27. $regex .= '])+/';
  28. return $regex;
  29. }
  30. // 3.11 integers are pretty strange, you are allowed to specify base
  31. // 2 ><= b <= 16 arbitrarily.
  32. function based_int($matches) {
  33. $base = $matches[1];
  34. $match = $matches[0];
  35. $this->pos_shift(strlen($matches[0]));
  36. $number = null;
  37. if ($base >= 2 && $base <= 16)
  38. $number = $this->scan($this->build_based_int_regex((int)$base));
  39. if ($number !== null) {
  40. $match .= $number;
  41. }
  42. $this->record($match, 'NUMERIC');
  43. // now we're going to greedily consume any trailing numbers
  44. // This handles the case e.g. 2#001122,
  45. // we don't want the '22' to get caught as a separate literal, we want to
  46. // make sure it's NOT highlighted as a literal
  47. // so we consume it here.
  48. if ($this->scan('/\d+/') !== null) {
  49. $this->record($this->match(), null);
  50. }
  51. }
  52. static function oo_stream_filter($tokens) {
  53. $c = count($tokens)-1;
  54. for($i=0; $i<$c; $i++) {
  55. if ($tokens[$i][1] === ':') {
  56. if ($i > 0) {
  57. $behind = &$tokens[$i-1][0];
  58. if ($behind === 'IDENT') $behind = 'OBJ';
  59. }
  60. if ($i < $c-1) {
  61. $ahead = &$tokens[$i+1][0];
  62. if ($ahead === 'IDENT') $ahead = 'OO';
  63. $i++;
  64. }
  65. }
  66. }
  67. return $tokens;
  68. }
  69. function init() {
  70. $this->remove_stream_filter('oo-syntax');
  71. $this->remove_filter('comment-to-doc');
  72. $this->add_stream_filter('oo-syntax', array($this, 'oo_stream_filter'));
  73. $this->add_filter('interpolation', 'STRING', array($this, 'str_filter'));
  74. // 3.6 - technically should include the newline, but doesn't really matter
  75. $this->add_pattern('COMMENT', '/%.*/');
  76. // stuff like -module, -author
  77. $this->add_pattern('KEYWORD', '/^-(?:[a-z_]\w*)\\b/m');
  78. // 3.11 integer with radix
  79. $this->add_pattern('BASED_INT', '/[+\\-]?(\d+)#/');
  80. $this->overrides['BASED_INT'] = array($this, 'based_int');
  81. // float
  82. $this->add_pattern('NUMERIC', '/[+\\-]?\d+\.\d+([eE][+\\-]?\d+)?/');
  83. // int
  84. $this->add_pattern('NUMERIC', '/[+\\-]?\d+/');
  85. // 3.7 defines some 'separators', included are . : | || ; , ? -> and #
  86. // we'll capture these separately to operators
  87. // and map it to a keyword, for lack of anything better
  88. $this->add_pattern('SEPARATOR', '/\\|\\||->|[\\.:\\|;,?#]/');
  89. $this->rule_tag_map['SEPARATOR'] = 'KEYWORD';
  90. // 3.9
  91. $this->add_pattern('OPERATOR', '%==|/=|=:=|=<|>=|\\+\\+|--|<-|[+\\-*=!<>/]%');
  92. // 3.9 named ops
  93. $this->add_identifier_mapping('OPERATOR', array('div', 'rem', 'or', 'xor',
  94. 'bor', 'bxor', 'bsl', 'bsr', 'and', 'band', 'not', 'bnot'));
  95. // char literals occur after a '$'
  96. $this->add_pattern('CHARACTER', '/\\$(?:(?:\\\\(?:\\^\w+|\d+|.))|.)/');
  97. $this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR);
  98. // this looks like a string, but is in fact an 'atom'
  99. // we'll call it a value,
  100. $this->add_pattern('VALUE', LuminousTokenPresets::$SINGLE_STR);
  101. $this->add_pattern('IDENT', '/[a-z][@\w]*/');
  102. $this->add_pattern('VARIABLE', '/[A-Z][@\w]*/');
  103. // 3.8
  104. $this->add_identifier_mapping('KEYWORD', array('after', 'begin', 'case',
  105. 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', 'query', 'receive',
  106. 'when',
  107. // reserved, but undefined:
  108. 'all_true', 'some_true'
  109. ));
  110. $this->add_identifier_mapping('VALUE', array('true', 'false'));
  111. // from the BIF section
  112. $this->add_identifier_mapping('FUNCTION', array(
  113. 'atom', 'binary', 'constant', 'float', 'integer', 'function', 'list',
  114. 'number', 'pid', 'port', 'reference', 'tuple', 'atom_to_list', 'list_to_atom',
  115. 'abs', 'float', 'float_to_list', 'integer_to_list', 'list_to_float',
  116. 'list_to_integer', 'round', 'trunc', 'binary_to_list', 'binary_to_term',
  117. 'concat_binary', 'list_to_binary', 'size', 'split_binary', 'term_to_binary',
  118. 'element', 'list_to_tuple', 'seteleemnt', 'size', 'tuple_to_list', 'hd',
  119. 'length', 't1', 'check_process-code', 'delete_module', 'load_module',
  120. 'preloaded', 'purge_module', 'module_loaded', 'apply', 'exit', 'group_leader',
  121. 'link', 'list_to_pid', 'pid_to_list', 'process_flag', 'process_info',
  122. 'processes', 'self', 'spawn', 'spawn_link', 'unlink', 'erase', 'get',
  123. 'get_keys', 'put', 'disconnect_node', 'get_cookie', 'halt', 'is_alive',
  124. 'monitor_node', 'node', 'nodes', 'processes', 'set_cookie', 'set_node',
  125. 'statistics', 'register', 'registered', 'unregister', 'whereis', 'open_port',
  126. 'port_close', 'port_info', 'ports', 'date', 'hash', 'make_ref', 'now', 'throw',
  127. 'time', 'acos', 'asin', 'atan', 'atan2', 'cos', 'cosh', 'exp', 'log', 'log10',
  128. 'pi', 'pow', 'sin', 'sinh', 'tan', 'tanh'));
  129. }
  130. static function guess_language($src, $info) {
  131. $p = 0.0;
  132. foreach(array('module', 'author', 'export', 'include') as $s) {
  133. if (strpos($src, '-' . $s) !== false) $p += 0.02;
  134. }
  135. if (strpos($src, ' ++ ') !== false) $p += 0.01;
  136. if (preg_match('/[a-zA-Z_]\w*#[a-zA-Z_]+/', $src)) $p += 0.05;
  137. // doc comment
  138. if (preg_match('/^%%/m', $src)) $p += 0.05;
  139. return $p;
  140. }
  141. }