dfa.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. package fte
  2. // #cgo CXXFLAGS: -std=c++11
  3. // #cgo LDFLAGS: -ldl ${SRCDIR}/../third_party/libs/libgmp.a
  4. // #include <stdlib.h>
  5. // #include <stdint.h>
  6. // void* _dfa_new(char *tbl, const uint32_t max_len);
  7. // void _dfa_delete(void *ptr);
  8. // int _dfa_rank(void *ptr, const char *s, const size_t ssz, char **out, size_t *sz);
  9. // int _dfa_unrank(void *ptr, const char *in, const size_t insz, char **out, size_t *sz);
  10. // char* _dfa_getNumWordsInLanguage(void *ptr, const uint32_t min_word_length, const uint32_t max_word_length, char **out, size_t *sz);
  11. import "C"
  12. import (
  13. "errors"
  14. "fmt"
  15. "math/big"
  16. "sync"
  17. "unsafe"
  18. "github.com/redjack/marionette/regex2dfa"
  19. )
  20. var (
  21. ErrLanguageIsEmptySet = errors.New("fte: language is empty set")
  22. )
  23. type DFA struct {
  24. mu sync.RWMutex
  25. ptr unsafe.Pointer
  26. capacity int
  27. regex string
  28. n int
  29. }
  30. func NewDFA(regex string, n int) (*DFA, error) {
  31. tbl, err := regex2dfa.Regex2DFA(regex)
  32. if err != nil {
  33. return nil, err
  34. }
  35. ctbl := C.CString(tbl)
  36. defer C.free(unsafe.Pointer(ctbl))
  37. ptr := C._dfa_new(ctbl, C.uint32_t(n))
  38. dfa := &DFA{ptr: ptr, regex: regex, n: n}
  39. // Calculate capacity.
  40. if err := dfa.calculateCapacity(); err != nil {
  41. dfa.Close()
  42. return nil, err
  43. }
  44. return dfa, nil
  45. }
  46. func (dfa *DFA) Close() error {
  47. if dfa.ptr != nil {
  48. C._dfa_delete(dfa.ptr)
  49. dfa.ptr = nil
  50. }
  51. return nil
  52. }
  53. // Regex returns the regex passed into the DFA.
  54. func (dfa *DFA) Regex() string { return dfa.regex }
  55. // N returns the n passed into the DFA.
  56. func (dfa *DFA) N() int { return dfa.n }
  57. // Capacity returns the capacity of the encoder.
  58. func (dfa *DFA) Capacity() int {
  59. return dfa.capacity
  60. }
  61. func (dfa *DFA) calculateCapacity() error {
  62. wordsInSlice, err := dfa.NumWordsInLanguage(dfa.n, dfa.n)
  63. if err != nil {
  64. return err
  65. } else if wordsInSlice.Cmp(big.NewInt(0)) == 0 {
  66. return ErrLanguageIsEmptySet
  67. }
  68. dfa.capacity = (Log2(wordsInSlice) - 1) / 8 // div by 8 to convert to bytes
  69. return nil
  70. }
  71. // Rank maps s into an integer ranking.
  72. func (dfa *DFA) Rank(s string) (*big.Int, error) {
  73. dfa.mu.Lock()
  74. defer dfa.mu.Unlock()
  75. cs := C.CString(s)
  76. defer C.free(unsafe.Pointer(cs))
  77. var cout *C.char
  78. var sz C.size_t
  79. errno := C._dfa_rank(dfa.ptr, cs, C.size_t(len(s)), &cout, &sz)
  80. out := C.GoStringN(cout, C.int(sz))
  81. C.free(unsafe.Pointer(cout))
  82. if errno != 0 {
  83. return nil, fmt.Errorf("fte.DFA.Rank: %s", out)
  84. }
  85. var rank big.Int
  86. if _, ok := rank.SetString(out, 10); !ok {
  87. return nil, fmt.Errorf("fte.Rank: cannot parse returned big.Int: %q", out)
  88. }
  89. return &rank, nil
  90. }
  91. // Unrank reverses the map from an integer to a string.
  92. func (dfa *DFA) Unrank(rank *big.Int) (string, error) {
  93. dfa.mu.Lock()
  94. defer dfa.mu.Unlock()
  95. rankStr := rank.String()
  96. cin := C.CString(rankStr)
  97. defer C.free(unsafe.Pointer(cin))
  98. var cout *C.char
  99. var sz C.size_t
  100. if ret := C._dfa_unrank(dfa.ptr, cin, C.size_t(len(rankStr)), &cout, &sz); ret != 0 {
  101. return "", fmt.Errorf("fte.Unrank: error")
  102. }
  103. out := C.GoStringN(cout, C.int(sz))
  104. C.free(unsafe.Pointer(cout))
  105. return out, nil
  106. }
  107. func (dfa *DFA) NumWordsInSlice(n int) (*big.Int, error) {
  108. return dfa.NumWordsInLanguage(n, n)
  109. }
  110. func (dfa *DFA) NumWordsInLanguage(min, max int) (*big.Int, error) {
  111. var cout *C.char
  112. var sz C.size_t
  113. C._dfa_getNumWordsInLanguage(dfa.ptr, C.uint32_t(min), C.uint32_t(max), &cout, &sz)
  114. out := C.GoStringN(cout, C.int(sz))
  115. C.free(unsafe.Pointer(cout))
  116. var rank big.Int
  117. if _, ok := rank.SetString(out, 10); !ok {
  118. return nil, fmt.Errorf("fte.NumWordsInLanguage: cannot parse returned big.Int: %q", out)
  119. }
  120. return &rank, nil
  121. }
  122. // Log2 returns floor(log2(v)).
  123. func Log2(v *big.Int) int {
  124. for i := 1; ; i++ {
  125. var exp big.Int
  126. exp.Exp(big.NewInt(2), big.NewInt(int64(i)), nil)
  127. if cmp := exp.Cmp(v); cmp == 0 {
  128. return i
  129. } else if cmp == 1 {
  130. return i - 1
  131. }
  132. }
  133. }