dataStoreRecovery_test.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. //go:build !PSIPHON_USE_BADGER_DB && !PSIPHON_USE_FILES_DB
  2. // +build !PSIPHON_USE_BADGER_DB,!PSIPHON_USE_FILES_DB
  3. /*
  4. * Copyright (c) 2019, Psiphon Inc.
  5. * All rights reserved.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. */
  21. package psiphon
  22. import (
  23. "context"
  24. "fmt"
  25. "io/ioutil"
  26. "os"
  27. "path/filepath"
  28. "strings"
  29. "sync"
  30. "testing"
  31. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  32. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
  33. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
  34. )
  35. // Set canTruncateOpenDataStore to false on platforms, such as Windows, where
  36. // the OS doesn't allow an open memory-mapped file to be truncated. This will
  37. // skip the associated test cases.
  38. var canTruncateOpenDataStore = true
  39. func TestBoltResiliency(t *testing.T) {
  40. testDataDirName, err := ioutil.TempDir("", "psiphon-bolt-recovery-test")
  41. if err != nil {
  42. t.Fatalf("TempDir failed: %s", err)
  43. }
  44. defer os.RemoveAll(testDataDirName)
  45. SetEmitDiagnosticNotices(true, true)
  46. clientConfigJSON := `
  47. {
  48. "ClientPlatform" : "",
  49. "ClientVersion" : "0000000000000000",
  50. "SponsorId" : "0000000000000000",
  51. "PropagationChannelId" : "0",
  52. "DisableTactics" : true,
  53. "ConnectionWorkerPoolSize" : 1,
  54. "TunnelConnectTimeoutSeconds" : 1,
  55. "EstablishTunnelTimeoutSeconds" : 0,
  56. "EstablishTunnelPausePeriodSeconds" : 1
  57. }`
  58. clientConfig, err := LoadConfig([]byte(clientConfigJSON))
  59. if err != nil {
  60. t.Fatalf("LoadConfig failed: %s", err)
  61. }
  62. clientConfig.DataRootDirectory = testDataDirName
  63. err = clientConfig.Commit(false)
  64. if err != nil {
  65. t.Fatalf("Commit failed: %s", err)
  66. }
  67. serverEntryCount := 100
  68. noticeCandidateServers := make(chan struct{}, 1)
  69. noticeExiting := make(chan struct{}, 1)
  70. noticeOpenResetDatastore := make(chan struct{}, 1)
  71. noticeDatastoreFailed := make(chan struct{}, 1)
  72. noticeFailedResetDatastore := make(chan struct{}, 1)
  73. err = SetNoticeWriter(NewNoticeReceiver(
  74. func(notice []byte) {
  75. noticeType, payload, err := GetNotice(notice)
  76. if err != nil {
  77. return
  78. }
  79. printNotice := false
  80. switch noticeType {
  81. case "CandidateServers":
  82. count := int(payload["count"].(float64))
  83. if count != serverEntryCount {
  84. t.Fatalf("unexpected server entry count: %d", count)
  85. }
  86. select {
  87. case noticeCandidateServers <- struct{}{}:
  88. default:
  89. }
  90. case "Exiting":
  91. select {
  92. case noticeExiting <- struct{}{}:
  93. default:
  94. }
  95. case "Warning":
  96. message := payload["message"].(string)
  97. var channel chan struct{}
  98. if strings.Contains(message, "tryDatastoreOpenDB: reset") {
  99. channel = noticeOpenResetDatastore
  100. } else if strings.Contains(message, "datastore has failed") {
  101. channel = noticeDatastoreFailed
  102. } else if strings.Contains(message, "reset failed datastore") {
  103. channel = noticeFailedResetDatastore
  104. }
  105. if channel != nil {
  106. select {
  107. case channel <- struct{}{}:
  108. default:
  109. }
  110. }
  111. }
  112. if printNotice {
  113. fmt.Printf("%s\n", string(notice))
  114. }
  115. }))
  116. if err != nil {
  117. t.Fatalf("error setting notice writer: %s", err)
  118. }
  119. defer ResetNoticeWriter()
  120. drainNoticeChannel := func(channel chan struct{}) {
  121. for {
  122. select {
  123. case <-channel:
  124. default:
  125. return
  126. }
  127. }
  128. }
  129. drainNoticeChannels := func() {
  130. drainNoticeChannel(noticeCandidateServers)
  131. drainNoticeChannel(noticeExiting)
  132. drainNoticeChannel(noticeOpenResetDatastore)
  133. drainNoticeChannel(noticeDatastoreFailed)
  134. drainNoticeChannel(noticeFailedResetDatastore)
  135. }
  136. // Paving sufficient server entries, then truncating the datastore file to
  137. // remove some server entry data, then iterating over all server entries (to
  138. // produce the CandidateServers output) triggers datastore corruption
  139. // detection and reset/recovery.
  140. paveServerEntries := func() {
  141. for i := 0; i < serverEntryCount; i++ {
  142. n := 16
  143. fields := make(protocol.ServerEntryFields)
  144. fields["ipAddress"] = fmt.Sprintf("127.0.0.%d", i+1)
  145. fields["sshPort"] = 2222
  146. fields["sshUsername"] = prng.HexString(n)
  147. fields["sshPassword"] = prng.HexString(n)
  148. fields["sshHostKey"] = prng.HexString(n)
  149. fields["capabilities"] = []string{"SSH", "ssh-api-requests"}
  150. fields["region"] = "US"
  151. fields["configurationVersion"] = 1
  152. fields.SetLocalSource(protocol.SERVER_ENTRY_SOURCE_EMBEDDED)
  153. fields.SetLocalTimestamp(
  154. common.TruncateTimestampToHour(common.GetCurrentTimestamp()))
  155. err = StoreServerEntry(fields, true)
  156. if err != nil {
  157. t.Fatalf("StoreServerEntry failed: %s", err)
  158. }
  159. }
  160. }
  161. startController := func() func() {
  162. controller, err := NewController(clientConfig)
  163. if err != nil {
  164. t.Fatalf("NewController failed: %s", err)
  165. }
  166. ctx, cancelFunc := context.WithCancel(context.Background())
  167. controllerWaitGroup := new(sync.WaitGroup)
  168. controllerWaitGroup.Add(1)
  169. go func() {
  170. defer controllerWaitGroup.Done()
  171. controller.Run(ctx)
  172. }()
  173. return func() {
  174. cancelFunc()
  175. controllerWaitGroup.Wait()
  176. }
  177. }
  178. corruptDataStore := func() {
  179. filename := filepath.Join(testDataDirName, "ca.psiphon.PsiphonTunnel.tunnel-core", "datastore", "psiphon.boltdb")
  180. file, err := os.OpenFile(filename, os.O_RDWR, 0666)
  181. if err != nil {
  182. t.Fatalf("OpenFile failed: %s", err)
  183. }
  184. defer file.Close()
  185. fileInfo, err := file.Stat()
  186. if err != nil {
  187. t.Fatalf("Stat failed: %s", err)
  188. }
  189. _, err = file.WriteAt(prng.Bytes(int(fileInfo.Size())), 0)
  190. if err != nil {
  191. t.Fatalf("WriteAt failed: %s", err)
  192. }
  193. err = file.Sync()
  194. if err != nil {
  195. t.Fatalf("Sync failed: %s", err)
  196. }
  197. }
  198. truncateDataStore := func() {
  199. filename := filepath.Join(testDataDirName, "ca.psiphon.PsiphonTunnel.tunnel-core", "datastore", "psiphon.boltdb")
  200. file, err := os.OpenFile(filename, os.O_RDWR, 0666)
  201. if err != nil {
  202. t.Fatalf("OpenFile failed: %s", err)
  203. }
  204. defer file.Close()
  205. fileInfo, err := file.Stat()
  206. if err != nil {
  207. t.Fatalf("Stat failed: %s", err)
  208. }
  209. err = file.Truncate(fileInfo.Size() / 4)
  210. if err != nil {
  211. t.Fatalf("Truncate failed: %s", err)
  212. }
  213. err = file.Sync()
  214. if err != nil {
  215. t.Fatalf("Sync failed: %s", err)
  216. }
  217. }
  218. // Populate datastore with 100 server entries.
  219. err = OpenDataStore(clientConfig)
  220. if err != nil {
  221. t.Fatalf("OpenDataStore failed: %s", err)
  222. }
  223. paveServerEntries()
  224. stopController := startController()
  225. <-noticeCandidateServers
  226. stopController()
  227. CloseDataStore()
  228. drainNoticeChannels()
  229. // Corrupt datastore file before running controller; expect a datastore
  230. // "reset" notice on OpenDataStore.
  231. t.Logf("test: recover from datastore corrupted before opening")
  232. corruptDataStore()
  233. err = OpenDataStore(clientConfig)
  234. if err != nil {
  235. t.Fatalf("OpenDataStore failed: %s", err)
  236. }
  237. <-noticeOpenResetDatastore
  238. if !canTruncateOpenDataStore {
  239. CloseDataStore()
  240. return
  241. }
  242. paveServerEntries()
  243. // Truncate datastore while running the controller. First, complete one
  244. // successful data scan (CandidateServers). The next scan should trigger a
  245. // datastore "failed" notice and datastore reset.
  246. t.Logf("test: detect corrupt datastore while running")
  247. stopController = startController()
  248. <-noticeCandidateServers
  249. truncateDataStore()
  250. <-noticeDatastoreFailed
  251. <-noticeFailedResetDatastore
  252. <-noticeExiting
  253. stopController()
  254. CloseDataStore()
  255. drainNoticeChannels()
  256. // Restart successfully after previous failure reset and shutdown.
  257. t.Logf("test: after restart, recover from reset datastore")
  258. err = OpenDataStore(clientConfig)
  259. if err != nil {
  260. t.Fatalf("OpenDataStore failed: %s", err)
  261. }
  262. paveServerEntries()
  263. stopController = startController()
  264. <-noticeCandidateServers
  265. stopController()
  266. CloseDataStore()
  267. }