scala.php 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. <?php
  2. /**
  3. * Scala
  4. *
  5. * Direct port of old luminous language file.
  6. *
  7. * TODO: The XML literals may contain embedded scala code. This is bad
  8. * because we ignore that currently, and we may, in rare circumstances,
  9. * incorrectly pop a tag when in fact it's inside a scala expression
  10. *
  11. * Some comments reference section numbers of the scala spec:
  12. * http://www.scala-lang.org/sites/default/files/linuxsoft_archives/docu/files/ScalaReference.pdf
  13. *
  14. */
  15. // scala inherits some stuff from Java
  16. require_once(dirname(__FILE__) . '/include/java_func_list.php');
  17. class LuminousScalaScanner extends LuminousSimpleScanner {
  18. /**
  19. * Multiline comments nest
  20. */
  21. function comment_override() {
  22. $this->nestable_token('COMMENT', '%/\\*%', '%\\*/%');
  23. }
  24. /**
  25. * Scala has XML literals.
  26. */
  27. function xml_override($matches) {
  28. // this might just be an inequality, so we first need to disambiguate
  29. // that
  30. // 1.5 - the disambiguation is pretty simple, an XML tag must
  31. // follow either whitespace, (, or {, and the '<' must be followed
  32. // by '[!?_a-zA-Z]
  33. // I'm not sure if a comment is a special case, or if it's treated as
  34. // whitespace...
  35. $xml = false;
  36. for($i=count($this->tokens)-1; $i>=0; $i--) {
  37. $tok = $this->tokens[$i];
  38. $name = $tok[0];
  39. // ... but we're going treat it as a no-op and skip over it
  40. if ($name === 'COMMENT') continue;
  41. $last_char = $tok[1][strlen($tok[1])-1];
  42. if (!(ctype_space($last_char) || $last_char === '(' ||
  43. $last_char === '{')) break;
  44. if (!$this->check('/<[!?a-zA-Z0-9_]/')) break;
  45. $xml = true;
  46. }
  47. if (!$xml) {
  48. $this->record($matches[0], 'OPERATOR');
  49. $this->pos_shift(strlen($matches[0]));
  50. return;
  51. }
  52. $subscanner = new LuminousXMLScanner();
  53. $subscanner->string($this->string());
  54. $subscanner->pos($this->pos());
  55. $subscanner->xml_literal = true;
  56. $subscanner->init();
  57. $subscanner->main();
  58. $tagged = $subscanner->tagged();
  59. $this->record($tagged, 'XML', true);
  60. $this->pos($subscanner->pos());
  61. }
  62. function init() {
  63. $this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_SL);
  64. $this->add_pattern('COMMENT_ML', '%/\\*%');
  65. $this->overrides['COMMENT_ML'] = array($this, 'comment_override');
  66. // 1.3.1 integer literals, 1.3.2 floatingPointLiteral
  67. // Do the float first so it takes precedence, our scanner does not follow
  68. // the max-munch rule
  69. $digit = '\d';
  70. $exp = '(?:[eE][+-]?\d+)';
  71. $suffix = '[FfDd]';
  72. $this->add_pattern('NUMERIC', "/(?: \d+\\.\d* | \\.\d+) $exp? $suffix? /x");
  73. $this->add_pattern('NUMERIC', "/\d+($exp $suffix? |$exp?$suffix)/x");
  74. $this->add_pattern('NUMERIC', '/(?:0x[a-fA-F0-9]+|\d+)[lL]?/');
  75. // 1.3.4 character literals
  76. // we can't really parse the unicode and work out what's printable,
  77. // so we'll just allow any unicode sequence
  78. $this->add_pattern('CHARACTER',
  79. "/'
  80. (
  81. (?:\\\\ (?:u[a-f0-9]{1,4}|\d+|.))
  82. | .
  83. )'/sx");
  84. // 1.3.5 - 1.3.6
  85. // strings are kind of pythonic, triple quoting makes them multiline
  86. $this->add_pattern('STRING', '/"""
  87. (?: [^"\\\\]+ | \\\\. | ""[^"] | "[^"])*
  88. (?:"""|$)/sx');
  89. $this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR_SL);
  90. $this->add_pattern('lt', '/</');
  91. $this->overrides['lt'] = array($this, 'xml_override');
  92. $this->add_pattern('OPERATOR', '/[¬!%^&*-=+~;:|>\\/?\\\\]+/');
  93. $this->add_pattern('IDENT', '/[a-z_]\w*/i');
  94. // 1.3.3 boolean literals
  95. $this->add_identifier_mapping('VALUE', array('true', 'false', 'null', 'None'));
  96. // from old luminous file
  97. $this->add_identifier_mapping('KEYWORD', array('abstract', 'case',
  98. 'catch', 'class', 'def', 'do', 'else', 'extends', 'final', 'finally',
  99. 'for', 'forSome', 'if', 'implicit', 'import', 'lazy', 'match',
  100. 'new', 'object', 'override', 'package', 'private', 'protected',
  101. 'return', 'sealed', 'super', 'this', 'throw', 'trait', 'try', 'type',
  102. 'val', 'var', 'while', 'with', 'yield'));
  103. $this->add_identifier_mapping('TYPE', array('boolean', 'byte', 'char',
  104. 'dobule', 'float', 'int', 'long', 'string', 'short', 'unit',
  105. 'Boolean', 'Byte', 'Char', 'Double', 'Float', 'Int', 'Long', 'String',
  106. 'Short', 'Unit'));
  107. // from Kate's syntax file
  108. $this->add_identifier_mapping('TYPE', array('ActorProxy', 'ActorTask',
  109. 'ActorThread', 'AllRef', 'Any', 'AnyRef', 'Application', 'AppliedType',
  110. 'Array', 'ArrayBuffer', 'Attribute', 'BoxedArray', 'BoxedBooleanArray',
  111. 'BoxedByteArray', 'BoxedCharArray', 'Buffer', 'BufferedIterator', 'Char',
  112. 'Console', 'Enumeration', 'Fluid', 'Function', 'IScheduler',
  113. 'ImmutableMapAdaptor', 'ImmutableSetAdaptor', 'Int', 'Iterable', 'List',
  114. 'ListBuffer', 'None', 'Option', 'Ordered', 'Pair', 'PartialFunction',
  115. 'Pid', 'Predef', 'PriorityQueue', 'PriorityQueueProxy', 'Reaction',
  116. 'Ref', 'Responder', 'RichInt', 'RichString', 'Rule', 'RuleTransformer',
  117. 'Script', 'Seq', 'SerialVersionUID', 'Some', 'Stream', 'Symbol',
  118. 'TcpService', 'TcpServiceWorker', 'Triple', 'Unit', 'Value',
  119. 'WorkerThread', 'serializable', 'transient', 'volatile'));
  120. $this->add_identifier_mapping('TYPE', $GLOBALS['luminous_java_types']);
  121. }
  122. public static function guess_language($src, $info) {
  123. $p = 0;
  124. // func def, a lot like python
  125. if (preg_match('/\\bdef\s+\w+\s*\(/', $src)) $p += 0.05;
  126. // val x = y
  127. if (preg_match('/\\bval\s+\w+\s*=/', $src)) $p += 0.1;
  128. // argument types
  129. if (preg_match('/\\(\s*\w+\s*:\s*(String|Int|Array)/', $src)) $p += 0.05;
  130. // tripled quoted strings, like python
  131. if (preg_match('/\'{3}|"{3}/', $src)) $p += 0.05;
  132. return $p;
  133. }
  134. }