regexp_format.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. /*
  2. Copyright 2014 Zachary Klippenstein
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. /*
  14. * Copyright (c) 2023, Psiphon Inc.
  15. * All rights reserved.
  16. *
  17. * This program is free software: you can redistribute it and/or modify
  18. * it under the terms of the GNU General Public License as published by
  19. * the Free Software Foundation, either version 3 of the License, or
  20. * (at your option) any later version.
  21. *
  22. * This program is distributed in the hope that it will be useful,
  23. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. * GNU General Public License for more details.
  26. *
  27. * You should have received a copy of the GNU General Public License
  28. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  29. *
  30. */
  31. package regen
  32. import (
  33. "bytes"
  34. "fmt"
  35. "io"
  36. "regexp/syntax"
  37. )
  38. // inspectRegexpToString returns a string describing a regular expression.
  39. func inspectRegexpToString(r *syntax.Regexp) string {
  40. var buffer bytes.Buffer
  41. inspectRegexpToWriter(&buffer, r)
  42. return buffer.String()
  43. }
  44. func inspectRegexpToWriter(w io.Writer, r ...*syntax.Regexp) {
  45. for _, regexp := range r {
  46. inspectWithIndent(regexp, "", w)
  47. }
  48. }
  49. func inspectWithIndent(r *syntax.Regexp, indent string, w io.Writer) {
  50. fmt.Fprintf(w, "%s{\n", indent)
  51. fmt.Fprintf(w, "%s Op: %s\n", indent, opToString(r.Op))
  52. fmt.Fprintf(w, "%s Flags: %x\n", indent, r.Flags)
  53. if len(r.Sub) > 0 {
  54. fmt.Fprintf(w, "%s Sub: [\n", indent)
  55. for _, subR := range r.Sub {
  56. inspectWithIndent(subR, indent+" ", w)
  57. }
  58. fmt.Fprintf(w, "%s ]\n", indent)
  59. } else {
  60. fmt.Fprintf(w, "%s Sub: []\n", indent)
  61. }
  62. fmt.Fprintf(w, "%s Rune: %s (%s)\n", indent, runesToUTF8(r.Rune...), runesToDecimalString(r.Rune))
  63. fmt.Fprintf(w, "%s [Min, Max]: [%d, %d]\n", indent, r.Min, r.Max)
  64. fmt.Fprintf(w, "%s Cap: %d\n", indent, r.Cap)
  65. fmt.Fprintf(w, "%s Name: %s\n", indent, r.Name)
  66. }
  67. // runesToUTF8 converts a slice of runes to the Unicode string they represent.
  68. func runesToUTF8(runes ...rune) []byte {
  69. var buffer bytes.Buffer
  70. for _, r := range runes {
  71. buffer.WriteRune(r)
  72. }
  73. return buffer.Bytes()
  74. }
  75. // runesToBytes converst a slice of runes to a slice of bytes.
  76. // Returns an error if runes not in the range [0-255].
  77. func runesToBytes(runes ...rune) ([]byte, error) {
  78. var buffer bytes.Buffer
  79. for _, r := range runes {
  80. if r < 0 || r > 255 {
  81. return nil, fmt.Errorf("RunesToBytes: rune out of range")
  82. }
  83. buffer.WriteByte(byte(r))
  84. }
  85. return buffer.Bytes(), nil
  86. }
  87. // RunesToDecimalString converts a slice of runes to their comma-separated decimal values.
  88. func runesToDecimalString(runes []rune) string {
  89. var buffer bytes.Buffer
  90. for _, r := range runes {
  91. buffer.WriteString(fmt.Sprintf("%d, ", r))
  92. }
  93. return buffer.String()
  94. }
  95. // opToString gets the string name of a regular expression operation.
  96. func opToString(op syntax.Op) string {
  97. switch op {
  98. case syntax.OpNoMatch:
  99. return "OpNoMatch"
  100. case syntax.OpEmptyMatch:
  101. return "OpEmptyMatch"
  102. case syntax.OpLiteral:
  103. return "OpLiteral"
  104. case syntax.OpCharClass:
  105. return "OpCharClass"
  106. case syntax.OpAnyCharNotNL:
  107. return "OpAnyCharNotNL"
  108. case syntax.OpAnyChar:
  109. return "OpAnyChar"
  110. case syntax.OpBeginLine:
  111. return "OpBeginLine"
  112. case syntax.OpEndLine:
  113. return "OpEndLine"
  114. case syntax.OpBeginText:
  115. return "OpBeginText"
  116. case syntax.OpEndText:
  117. return "OpEndText"
  118. case syntax.OpWordBoundary:
  119. return "OpWordBoundary"
  120. case syntax.OpNoWordBoundary:
  121. return "OpNoWordBoundary"
  122. case syntax.OpCapture:
  123. return "OpCapture"
  124. case syntax.OpStar:
  125. return "OpStar"
  126. case syntax.OpPlus:
  127. return "OpPlus"
  128. case syntax.OpQuest:
  129. return "OpQuest"
  130. case syntax.OpRepeat:
  131. return "OpRepeat"
  132. case syntax.OpConcat:
  133. return "OpConcat"
  134. case syntax.OpAlternate:
  135. return "OpAlternate"
  136. }
  137. panic(fmt.Sprintf("invalid op: %d", op))
  138. }