bnf.php 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. <?php
  2. /*
  3. * BNF has a lot of different variants and matching them all is pretty much
  4. * impossible.
  5. *
  6. * We're going to match the standard BNF and extended BNF and hopefully a
  7. * few very similar dialects
  8. */
  9. class LuminousBNFScanner extends LuminousStatefulScanner {
  10. function user_def_ext($matches) {
  11. if ($matches[1] !== '')
  12. $this->record($matches[1], null);
  13. $this->record_token($matches[2], 'USER_FUNCTION');
  14. $this->user_defs[$matches[2]] = 'VALUE';
  15. $this->pos_shift(strlen($matches[1]) + strlen($matches[2]));
  16. }
  17. private function set_strict() {
  18. // no transition table necessary, I think
  19. $this->add_pattern('COMMENT', '/<![^>]*>/');
  20. $this->add_pattern('KEYWORD', '/(?<=^<)[^>]+(?=>)/m');
  21. $this->add_pattern('KEYWORD', '/(?<=^\\{)[^\\}]+(?=\\})/m');
  22. $this->add_pattern('VALUE', '/(?<=\\{)[^\\}]+(?=\\})/');
  23. $this->add_pattern('VALUE', '/[\\-\w]+/');
  24. }
  25. private function set_extended() {
  26. $this->add_pattern('COMMENT', '/\\(\\* .*? \\*\\)/sx');
  27. $this->add_pattern('OPTION', '/\\[/', '/\\]/');
  28. $this->add_pattern('REPETITION', '/\\{/', '/\\}/');
  29. $this->add_pattern('GROUP', '/\\(/', '/\\)/');
  30. $this->add_pattern('SPECIAL', '/\\?/', '/\\?/');
  31. $ident = '(?:[\w\\-]+)';
  32. $this->add_pattern('RULE', "/(^[ \t]*)($ident)(\s*(?![[:alnum:]\s]))/mi");
  33. $this->overrides['RULE'] = array($this, 'user_def_ext');
  34. $this->add_pattern('IDENT', "/$ident/");
  35. // technically I don't know if we really need to worry about a transition
  36. // table, but here we are anyway
  37. $all = array('COMMENT', 'OPTION', 'REPETITION', 'GROUP', 'SPECIAL',
  38. 'STRING', 'IDENT', 'OPERATOR');
  39. $almost_all = array_filter($all, create_function('$x',
  40. 'return $x !== "SPECIAL";'));
  41. $this->transitions = array(
  42. 'initial' => array_merge(array('RULE'), $all),
  43. 'OPTION' => $all,
  44. 'REPETITION' => $all,
  45. 'GROUP' => $all,
  46. 'SPECIAL' => $almost_all
  47. );
  48. $this->rule_tag_map = array(
  49. 'OPTION' => null,
  50. 'REPETITION' => null,
  51. 'GROUP' => null,
  52. 'SPECIAL' => null
  53. );
  54. }
  55. function init() {
  56. // the original BNF uses <angle brackets> to delimit its
  57. // production rule names
  58. if (preg_match('/<\w+>/', $this->string())) {
  59. $this->set_strict();
  60. }
  61. else {
  62. $this->set_extended();
  63. }
  64. $this->add_pattern('STRING', LuminousTokenPresets::$SINGLE_STR_SL);
  65. $this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR_SL);
  66. $this->add_pattern('OPERATOR', '/[*\\-=+;:\\|,]+/');
  67. // assume a few chars at bol indicate a commented line
  68. $this->add_pattern('COMMENT', '/^[!%-;].*/m');
  69. $this->remove_filter('constant');
  70. $this->remove_filter('comment-to-doc');
  71. }
  72. static function guess_language($src, $info) {
  73. // being honest, BNF is going to be so rare that if we ever return
  74. // anything other than 0, it's more likely that we're obscuring the
  75. // correct scanner than correctly identifying BNF.
  76. return 0;
  77. }
  78. }