dataStoreRecovery_test.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. // +build !PSIPHON_USE_BADGER_DB,!PSIPHON_USE_FILES_DB
  2. /*
  3. * Copyright (c) 2019, Psiphon Inc.
  4. * All rights reserved.
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. *
  19. */
  20. package psiphon
  21. import (
  22. "context"
  23. "fmt"
  24. "io/ioutil"
  25. "os"
  26. "path/filepath"
  27. "strings"
  28. "sync"
  29. "testing"
  30. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  31. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  32. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
  33. )
  34. // Set canTruncateOpenDataStore to false on platforms, such as Windows, where
  35. // the OS doesn't allow an open memory-mapped file to be truncated. This will
  36. // skip the associated test cases.
  37. var canTruncateOpenDataStore = true
  38. func TestBoltResiliency(t *testing.T) {
  39. testDataDirName, err := ioutil.TempDir("", "psiphon-bolt-recovery-test")
  40. if err != nil {
  41. t.Fatalf("TempDir failed: %s", err)
  42. }
  43. defer os.RemoveAll(testDataDirName)
  44. SetEmitDiagnosticNotices(true, true)
  45. clientConfigJSON := `
  46. {
  47. "ClientPlatform" : "",
  48. "ClientVersion" : "0",
  49. "SponsorId" : "0",
  50. "PropagationChannelId" : "0",
  51. "ConnectionWorkerPoolSize" : 10,
  52. "EstablishTunnelTimeoutSeconds" : 1,
  53. "EstablishTunnelPausePeriodSeconds" : 1
  54. }`
  55. clientConfig, err := LoadConfig([]byte(clientConfigJSON))
  56. if err != nil {
  57. t.Fatalf("LoadConfig failed: %s", err)
  58. }
  59. clientConfig.DataRootDirectory = testDataDirName
  60. err = clientConfig.Commit(false)
  61. if err != nil {
  62. t.Fatalf("Commit failed: %s", err)
  63. }
  64. serverEntryCount := 100
  65. noticeCandidateServers := make(chan struct{}, 1)
  66. noticeExiting := make(chan struct{}, 1)
  67. noticeResetDatastore := make(chan struct{}, 1)
  68. noticeDatastoreFailed := make(chan struct{}, 1)
  69. SetNoticeWriter(NewNoticeReceiver(
  70. func(notice []byte) {
  71. noticeType, payload, err := GetNotice(notice)
  72. if err != nil {
  73. return
  74. }
  75. printNotice := false
  76. switch noticeType {
  77. case "CandidateServers":
  78. count := int(payload["count"].(float64))
  79. if count != serverEntryCount {
  80. t.Fatalf("unexpected server entry count: %d", count)
  81. }
  82. select {
  83. case noticeCandidateServers <- struct{}{}:
  84. default:
  85. }
  86. case "Exiting":
  87. select {
  88. case noticeExiting <- struct{}{}:
  89. default:
  90. }
  91. case "Alert":
  92. message := payload["message"].(string)
  93. var channel chan struct{}
  94. if strings.Contains(message, "tryDatastoreOpenDB: reset") {
  95. channel = noticeResetDatastore
  96. } else if strings.Contains(message, "datastore has failed") {
  97. channel = noticeDatastoreFailed
  98. }
  99. if channel != nil {
  100. select {
  101. case channel <- struct{}{}:
  102. default:
  103. }
  104. }
  105. }
  106. if printNotice {
  107. fmt.Printf("%s\n", string(notice))
  108. }
  109. }))
  110. drainNoticeChannel := func(channel chan struct{}) {
  111. for {
  112. select {
  113. case channel <- struct{}{}:
  114. default:
  115. return
  116. }
  117. }
  118. }
  119. drainNoticeChannels := func() {
  120. drainNoticeChannel(noticeCandidateServers)
  121. drainNoticeChannel(noticeExiting)
  122. drainNoticeChannel(noticeResetDatastore)
  123. drainNoticeChannel(noticeDatastoreFailed)
  124. }
  125. // Paving sufficient server entries, then truncating the datastore file to
  126. // remove some server entry data, then iterating over all server entries (to
  127. // produce the CandidateServers output) triggers datastore corruption
  128. // detection and, at start up, reset/recovery.
  129. paveServerEntries := func() {
  130. for i := 0; i < serverEntryCount; i++ {
  131. n := 16
  132. fields := make(protocol.ServerEntryFields)
  133. fields["ipAddress"] = fmt.Sprintf("127.0.0.%d", i+1)
  134. fields["sshPort"] = 2222
  135. fields["sshUsername"] = prng.HexString(n)
  136. fields["sshPassword"] = prng.HexString(n)
  137. fields["sshHostKey"] = prng.HexString(n)
  138. fields["capabilities"] = []string{"SSH", "ssh-api-requests"}
  139. fields["region"] = "US"
  140. fields["configurationVersion"] = 1
  141. fields.SetLocalSource(protocol.SERVER_ENTRY_SOURCE_EMBEDDED)
  142. fields.SetLocalTimestamp(
  143. common.TruncateTimestampToHour(common.GetCurrentTimestamp()))
  144. err = StoreServerEntry(fields, true)
  145. if err != nil {
  146. t.Fatalf("StoreServerEntry failed: %s", err)
  147. }
  148. }
  149. }
  150. startController := func() func() {
  151. controller, err := NewController(clientConfig)
  152. if err != nil {
  153. t.Fatalf("NewController failed: %s", err)
  154. }
  155. ctx, cancelFunc := context.WithCancel(context.Background())
  156. controllerWaitGroup := new(sync.WaitGroup)
  157. controllerWaitGroup.Add(1)
  158. go func() {
  159. defer controllerWaitGroup.Done()
  160. controller.Run(ctx)
  161. }()
  162. return func() {
  163. cancelFunc()
  164. controllerWaitGroup.Wait()
  165. }
  166. }
  167. truncateDataStore := func() {
  168. filename := filepath.Join(testDataDirName, "ca.psiphon.PsiphonTunnel.tunnel-core", "datastore", "psiphon.boltdb")
  169. configFile, err := os.OpenFile(filename, os.O_RDWR, 0666)
  170. if err != nil {
  171. t.Fatalf("OpenFile failed: %s", err)
  172. }
  173. defer configFile.Close()
  174. fileInfo, err := configFile.Stat()
  175. if err != nil {
  176. t.Fatalf("Stat failed: %s", err)
  177. }
  178. err = configFile.Truncate(fileInfo.Size() / 4)
  179. if err != nil {
  180. t.Fatalf("Truncate failed: %s", err)
  181. }
  182. err = configFile.Sync()
  183. if err != nil {
  184. t.Fatalf("Sync failed: %s", err)
  185. }
  186. }
  187. // Populate datastore with 100 server entries.
  188. err = OpenDataStore(clientConfig)
  189. if err != nil {
  190. t.Fatalf("OpenDataStore failed: %s", err)
  191. }
  192. paveServerEntries()
  193. stopController := startController()
  194. <-noticeCandidateServers
  195. stopController()
  196. CloseDataStore()
  197. drainNoticeChannels()
  198. // Truncate datastore file before running controller; expect a datastore
  199. // "reset" notice on OpenDataStore.
  200. t.Logf("test: recover from datastore corrupted before opening")
  201. truncateDataStore()
  202. err = OpenDataStore(clientConfig)
  203. if err != nil {
  204. t.Fatalf("OpenDataStore failed: %s", err)
  205. }
  206. <-noticeResetDatastore
  207. if !canTruncateOpenDataStore {
  208. CloseDataStore()
  209. return
  210. }
  211. paveServerEntries()
  212. // Truncate datastore while running the controller. First, complete one
  213. // successful data scan (CandidateServers). The next scan should trigger a
  214. // datastore "failed" notice.
  215. t.Logf("test: detect corrupt datastore while running")
  216. stopController = startController()
  217. <-noticeCandidateServers
  218. truncateDataStore()
  219. <-noticeDatastoreFailed
  220. <-noticeExiting
  221. stopController()
  222. CloseDataStore()
  223. drainNoticeChannels()
  224. // Restart successfully after previous failure shutdown.
  225. t.Logf("test: after restart, recover from datastore corrupted while running")
  226. err = OpenDataStore(clientConfig)
  227. if err != nil {
  228. t.Fatalf("OpenDataStore failed: %s", err)
  229. }
  230. <-noticeResetDatastore
  231. paveServerEntries()
  232. stopController = startController()
  233. <-noticeCandidateServers
  234. stopController()
  235. CloseDataStore()
  236. }