transforms.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /*
  2. * Copyright (c) 2022, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package transforms provides a mechanism to define and apply string data
  20. // transformations, with the transformations defined by regular expressions
  21. // to match data to be transformed, and regular expression generators to
  22. // specify additional or replacement data.
  23. package transforms
  24. import (
  25. "regexp"
  26. "regexp/syntax"
  27. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  28. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  29. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/regen"
  30. )
  31. const (
  32. SCOPE_ANY = ""
  33. )
  34. // Spec is a transform spec. A spec is a list of individual transforms to be
  35. // applied in order. Each transform is defined by two elements: a regular
  36. // expression to by matched against the input; and a regular expression
  37. // generator which generates new data. Subgroups from the regular expression
  38. // may be specified in the regular expression generator, and are populated
  39. // with the subgroup match, and in this way parts of the original matching
  40. // data may be retained in the transformed data.
  41. //
  42. // For example, with the transform [2]string{"([a-b])", "\\$\\
  43. // {1\\}c"}, substrings consisting of the characters 'a' and 'b' will be
  44. // transformed into the same substring with a single character 'c' appended.
  45. type Spec [][2]string
  46. // Specs is a set of named Specs.
  47. type Specs map[string]Spec
  48. // Validate checks that all entries in a set of Specs is well-formed, with
  49. // valid regular expressions.
  50. func (specs Specs) Validate(prefixMode bool) error {
  51. seed, err := prng.NewSeed()
  52. if err != nil {
  53. return errors.Trace(err)
  54. }
  55. for _, spec := range specs {
  56. // Call Apply to compile/validate the regular expressions and generators.
  57. if prefixMode {
  58. if len(spec) != 1 || len(spec[0]) != 2 {
  59. return errors.TraceNew("prefix mode requires exactly one transform")
  60. }
  61. _, _, err := spec.ApplyPrefix(seed, 0)
  62. if err != nil {
  63. return errors.Trace(err)
  64. }
  65. } else {
  66. _, err := spec.ApplyString(seed, "")
  67. if err != nil {
  68. return errors.Trace(err)
  69. }
  70. }
  71. }
  72. return nil
  73. }
  74. // ScopedSpecNames groups a list of Specs, referenced by their Spec name, with
  75. // the group defined by a scope. The meaning of scope depends on the context
  76. // in which the transforms are to be used.
  77. //
  78. // For example, in the context of DNS request transforms, the scope is the DNS
  79. // server for which a specific group of transforms is known to be effective.
  80. //
  81. // The scope name "" is SCOPE_ANY, and matches any input scope name when there
  82. // is no specific entry for that scope name in ScopedSpecNames.
  83. type ScopedSpecNames map[string][]string
  84. // Validate checks that the ScopedSpecNames is well-formed and referenced Spec
  85. // names are defined in the corresponding input specs.
  86. func (scopedSpecs ScopedSpecNames) Validate(specs Specs) error {
  87. for _, scoped := range scopedSpecs {
  88. for _, specName := range scoped {
  89. _, ok := specs[specName]
  90. if !ok {
  91. return errors.Tracef("undefined spec name: %s", specName)
  92. }
  93. }
  94. }
  95. return nil
  96. }
  97. // Select picks a Spec from Specs based on the input scope and scoping rules.
  98. // If the input scope name is defined in scopedSpecs, that match takes
  99. // precedence. Otherwise SCOPE_ANY is selected, when present.
  100. //
  101. // After the scope is resolved, Select randomly selects from the matching Spec
  102. // list.
  103. //
  104. // Select will return "", nil when no selection can be made.
  105. func (specs Specs) Select(scope string, scopedSpecs ScopedSpecNames) (string, Spec) {
  106. if scope != SCOPE_ANY {
  107. scoped, ok := scopedSpecs[scope]
  108. if ok {
  109. // If the specific scope is defined but empty, this means select
  110. // nothing -- don't fall through to SCOPE_ANY.
  111. if len(scoped) == 0 {
  112. return "", nil
  113. }
  114. specName := scoped[prng.Intn(len(scoped))]
  115. spec, ok := specs[specName]
  116. if !ok {
  117. // specName is not found in specs, which should not happen if
  118. // Validate passes; select nothing in this case.
  119. return "", nil
  120. }
  121. return specName, spec
  122. }
  123. // Fall through to SCOPE_ANY.
  124. }
  125. anyScope, ok := scopedSpecs[SCOPE_ANY]
  126. if !ok || len(anyScope) == 0 {
  127. // No SCOPE_ANY, or SCOPE_ANY is an empty list.
  128. return "", nil
  129. }
  130. specName := anyScope[prng.Intn(len(anyScope))]
  131. spec, ok := specs[specName]
  132. if !ok {
  133. return "", nil
  134. }
  135. return specName, spec
  136. }
  137. // ApplyPrefix unlike other Apply methods, does not apply the Spec to an input.
  138. // It instead generates a sequence of bytes according to the Spec, and returns
  139. // at least minLength bytes if the Spec generates fewer than minLength bytes.
  140. //
  141. // The input seed is used for all random number generation. The same seed can be
  142. // supplied to produce the same output, for replay.
  143. func (spec Spec) ApplyPrefix(seed *prng.Seed, minLength int) ([]byte, int, error) {
  144. if len(spec) != 1 || len(spec[0]) != 2 {
  145. return nil, 0, errors.TraceNew("prefix mode requires exactly one transform")
  146. }
  147. rng := prng.NewPRNGWithSeed(seed)
  148. args := &regen.GeneratorArgs{
  149. RngSource: rng,
  150. ByteMode: true,
  151. }
  152. gen, err := regen.NewGenerator(spec[0][1], args)
  153. if err != nil {
  154. return nil, 0, errors.Trace(err)
  155. }
  156. prefix, err := gen.Generate()
  157. if err != nil {
  158. return nil, 0, errors.Trace(err)
  159. }
  160. prefixLen := len(prefix)
  161. if len(prefix) < minLength {
  162. // Add random padding to fill up to minLength.
  163. padding := rng.Bytes(minLength - len(prefix))
  164. prefix = append(prefix, padding...)
  165. }
  166. return prefix, prefixLen, nil
  167. }
  168. // ApplyString applies the Spec to the input string, producing the output string.
  169. //
  170. // The input seed is used for all random generation. The same seed can be
  171. // supplied to produce the same output, for replay.
  172. func (spec Spec) ApplyString(seed *prng.Seed, input string) (string, error) {
  173. value := input
  174. for _, transform := range spec {
  175. re, replacement, err := makeRegexAndRepl(seed, transform)
  176. if err != nil {
  177. return "", errors.Trace(err)
  178. }
  179. value = re.ReplaceAllString(value, string(replacement))
  180. }
  181. return value, nil
  182. }
  183. // Apply applies the Spec to the input bytes, producing the output bytes.
  184. //
  185. // The input seed is used for all random generation. The same seed can be
  186. // supplied to produce the same output, for replay.
  187. func (spec Spec) Apply(seed *prng.Seed, input []byte) ([]byte, error) {
  188. value := input
  189. for _, transform := range spec {
  190. re, replacement, err := makeRegexAndRepl(seed, transform)
  191. if err != nil {
  192. return nil, errors.Trace(err)
  193. }
  194. value = re.ReplaceAll(value, replacement)
  195. }
  196. return value, nil
  197. }
  198. // makeRegexAndRepl generates the regex and replacement for a given seed and
  199. // transform. The same seed can be supplied to produce the same output, for
  200. // replay.
  201. func makeRegexAndRepl(seed *prng.Seed, transform [2]string) (*regexp.Regexp, []byte, error) {
  202. // TODO: the compiled regexp and regen could be cached, but the seed is an
  203. // issue with caching the regen.
  204. args := &regen.GeneratorArgs{
  205. RngSource: prng.NewPRNGWithSeed(seed),
  206. Flags: syntax.OneLine | syntax.NonGreedy,
  207. }
  208. rg, err := regen.NewGenerator(transform[1], args)
  209. if err != nil {
  210. return nil, nil, errors.Trace(err)
  211. }
  212. replacement, err := rg.Generate()
  213. if err != nil {
  214. return nil, nil, errors.Trace(err)
  215. }
  216. re, err := regexp.Compile(transform[0])
  217. if err != nil {
  218. return nil, nil, errors.Trace(err)
  219. }
  220. return re, replacement, nil
  221. }