summaryrefslogtreecommitdiff
path: root/test/e2e-go/cli/goal/expect/catchpointCatchupTest.exp
blob: 0a3b8ea48eb5f3f6ab73008c87103e09227a72ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/expect -f
set err 0
log_user 1

# catchpointCatchupTest - test overview
#
# The goal of the test is to demonstrate the catchpoint catchup functionality using the goal command line interface.
# It does that by deploying a single relay, which advances until it generates a catchpoint.
# Once it does, another node is started, and instructred to catchup using the catchpoint from the first relay.
# To make sure that the second node won't be using the "regular" catchup, we tunnel all the communication between the two using a proxy.
# The proxy is responsible to filter out block number 2. This would prevent the "regular" catchup from working,
# and would be a good test ground for the catchpoint catchup.
#
# The second test is a variation of the first one, but we also stop the node that is catching up in the middle of it's catchup process.
# That allows us to verify that the node persist it's catchpoint catchup mode correctly.
#

proc spawnCatchpointCatchupWebProxy { TARGET_ENDPOINT RUNTIME REQUEST_DELAY } {
    upvar WP_SPAWN_ID WP_SPAWN_ID
    set WEBPROXY_LISTEN_ADDRESS ""

    # compile the catchpointCatchupWebProxy, so we can kick it off quickly later on.
    # this can take a while, especially if we compiled just the build-race version.
    set timeout 240
    spawn go install ./catchpointCatchupWebProxy
    expect {
        timeout {::AlgorandGoal::Abort "timed out compiling catchpointCatchupWebProxy"}
        eof { ::AlgorandGoal::CheckEOF "failed to compile catchpointCatchupWebProxy"}
    }

    set timeout 5
    spawn catchpointCatchupWebProxy -targetEndpoint "$TARGET_ENDPOINT" -runtime $RUNTIME -requestDelay $REQUEST_DELAY
    set WP_SPAWN_ID $spawn_id
    expect {
        -re {(^[0-9\.]+:[0-9]+)} { set WEBPROXY_LISTEN_ADDRESS $expect_out(1,string) }
        timeout {::AlgorandGoal::Abort "timed out waiting for web proxy listen address"}
        eof { ::AlgorandGoal::CheckEOF "web proxy failed to start"}
    }

    puts "[clock format [clock seconds] -format %H:%M:%S]: Web proxy listening address is $WEBPROXY_LISTEN_ADDRESS"
    return $WEBPROXY_LISTEN_ADDRESS
}

if { [catch {
    source goalExpectCommon.exp
    set TEST_ALGO_DIR [lindex $argv 0]
    set TEST_DATA_DIR [lindex $argv 1]

    puts "TEST_ALGO_DIR: $TEST_ALGO_DIR"
    puts "TEST_DATA_DIR: $TEST_DATA_DIR"

    set TIME_STAMP [clock seconds]

    set TEST_ROOT_DIR $TEST_ALGO_DIR/root
    set TEST_PRIMARY_NODE_DIR $TEST_ROOT_DIR/Primary/
    set NETWORK_NAME test_net_expect_$TIME_STAMP
    set NETWORK_TEMPLATE "$TEST_DATA_DIR/nettemplates/CatchpointCatchupTestNetwork.json"

    # copy the consensus protocol to this data directory.
    exec mkdir -p $TEST_ALGO_DIR
    exec cp $TEST_DATA_DIR/consensus/catchpointtestingprotocol.json $TEST_ALGO_DIR/consensus.json

    # Create network
    ::AlgorandGoal::CreateNetwork $NETWORK_NAME $NETWORK_TEMPLATE $TEST_ALGO_DIR $TEST_ROOT_DIR

    # Update the Primary Node configuration
    exec -- cat "$TEST_ROOT_DIR/Primary/config.json" | jq {. |= . + {"MaxAcctLookback": 2, "CatchpointInterval": 4,"EnableRequestLogger":true}} > $TEST_ROOT_DIR/Primary/config.json.new
    exec rm $TEST_ROOT_DIR/Primary/config.json
    exec mv $TEST_ROOT_DIR/Primary/config.json.new $TEST_ROOT_DIR/Primary/config.json

    # Update the Second Node configuration
    exec -- cat "$TEST_ROOT_DIR/Node/config.json" | jq {. |= . + {"MaxAcctLookback": 2, "CatchupParallelBlocks":2}} > $TEST_ROOT_DIR/Node/config.json.new
    exec rm $TEST_ROOT_DIR/Node/config.json
    exec mv $TEST_ROOT_DIR/Node/config.json.new $TEST_ROOT_DIR/Node/config.json


    set ::GLOBAL_TEST_ALGO_DIR $TEST_ALGO_DIR
    set ::GLOBAL_TEST_ROOT_DIR $TEST_ROOT_DIR
    set ::GLOBAL_NETWORK_NAME $NETWORK_NAME

    # Start the Primary Node
    ::AlgorandGoal::StartNode $TEST_ROOT_DIR/Primary


    # Wait until the primary node reaches round 37. At that point, the catchpoint for round 36 is already done.
    # The rationale is the following:
    # 1. MaxTxnLife = 23 and CatchpointLookback = 8 so catchup would load blocks 36-8-23..36 = 5..36
    # 2. Loading block 2 is blocked by the catchpoint proxy
    # 3. Next block is 37 that would require balances from round 37-MaxBalLookback = 29 to be accessed, and this is
    # within the expected range of 5...36
    ::AlgorandGoal::WaitForRound 37 $TEST_ROOT_DIR/Primary

    # Get primary node listening address:
    set PRIMARY_LISTEN_ADDRESS ""
    spawn cat $TEST_ROOT_DIR/Primary/algod-listen.net
    expect {
        -re {http:\/\/([0-9\.]+:[0-9]+)} { set PRIMARY_LISTEN_ADDRESS $expect_out(1,string); exp_continue;}
        timeout {::AlgorandGoal::Abort "timed out listing $TEST_ROOT_DIR/Primary/algod-listen.net"}
        eof { ::AlgorandGoal::CheckEOF "Unable to list $TEST_ROOT_DIR/Primary/algod-listen.net" }
    }

    if { $PRIMARY_LISTEN_ADDRESS == "" } {
        ::AlgorandGoal::StopNode $TEST_ROOT_DIR/Primary
        puts "Primary node listening address could not be retrieved."
        exit 1
    }

    puts "[clock format [clock seconds] -format %H:%M:%S]: Primary node listening address is $PRIMARY_LISTEN_ADDRESS"

    # start the web proxy
    set WP_SPAWN_ID 0
    set WEBPROXY_LISTEN_ADDRESS [spawnCatchpointCatchupWebProxy $PRIMARY_LISTEN_ADDRESS 30 20]

    ::AlgorandGoal::StartNode $TEST_ROOT_DIR/Node False $WEBPROXY_LISTEN_ADDRESS

    # once the node is started we can clear the ::GLOBAL_TEST_ALGO_DIR, so that shutdown would be done as a network.
    unset ::GLOBAL_TEST_ALGO_DIR

    ::AlgorandGoal::WaitForRound 1 $TEST_ROOT_DIR/Node

    set CATCHPOINT [::AlgorandGoal::GetNodeLastCatchpoint $TEST_ROOT_DIR/Primary]

    puts "[clock format [clock seconds] -format %H:%M:%S]: Catchpoint is $CATCHPOINT"

    regexp -nocase {([0-9]*)#[A-Z2-7]*} $CATCHPOINT CATCHPOINT_ROUND CATCHPOINT_ROUND

    puts "[clock format [clock seconds] -format %H:%M:%S]: Catchpoint round is $CATCHPOINT_ROUND"

    # wait for the primary to reach $CATCHPOINT_ROUND + 5, so that the catchpoint file would be saved
    ::AlgorandGoal::WaitForRound [expr {int($CATCHPOINT_ROUND + 5)}] $TEST_ROOT_DIR/Primary

    ::AlgorandGoal::StartCatchup $TEST_ROOT_DIR/Node $CATCHPOINT

    ::AlgorandGoal::WaitForRound $CATCHPOINT_ROUND $TEST_ROOT_DIR/Node

    ::AlgorandGoal::StopNode $TEST_ROOT_DIR/Node

    # close the web proxy
    close -i $WP_SPAWN_ID

    puts "[clock format [clock seconds] -format %H:%M:%S]: catchpointCatchupTest basic test completed"

} EXCEPTION ] } {
   ::AlgorandGoal::Abort "ERROR in catchpointCatchupTest - basic test: $EXCEPTION"
}

    # basic testing is done. Now, let's try to see if we can stop and resume a node.
if { [catch {
    # delete the genesis directory, so that the node would be on round 0 again.
    file delete -force $TEST_ROOT_DIR/Node/node.log
    foreach path [glob $TEST_ROOT_DIR/Node/test_net*] {
        file delete -force -- $path
    }

    # start the proxy. this time, make it go really slow
    set WEBPROXY_LISTEN_ADDRESS [spawnCatchpointCatchupWebProxy $PRIMARY_LISTEN_ADDRESS 30 1500]

    ::AlgorandGoal::StartNode $TEST_ROOT_DIR/Node False $WEBPROXY_LISTEN_ADDRESS

    set NODE_ROUND [::AlgorandGoal::WaitForRound 1 $TEST_ROOT_DIR/Node]

    if { $NODE_ROUND >= 35 } {
        # it means that we haven't reset the directory correctly.
        ::AlgorandGoal::Abort "Node $TEST_ROOT_DIR/Node should have been reset, but it didn't"
    }

    ::AlgorandGoal::StartCatchup $TEST_ROOT_DIR/Node $CATCHPOINT

    # wait for the node to start catching up for up to 35 seconds. ( it won't take that long, but it's not immediately either ).
    ::AlgorandGoal::WaitCatchup $TEST_ROOT_DIR/Node 35

    ::AlgorandGoal::StopNode $TEST_ROOT_DIR/Node

    # close the web proxy
    close -i $WP_SPAWN_ID

    # restart the web proxy, without the delaying part.
    set WEBPROXY_LISTEN_ADDRESS [spawnCatchpointCatchupWebProxy $PRIMARY_LISTEN_ADDRESS 30 20]

    ::AlgorandGoal::StartNode $TEST_ROOT_DIR/Node False $WEBPROXY_LISTEN_ADDRESS

    # once the node is started we can clear the ::GLOBAL_TEST_ALGO_DIR, so that shutdown would be done as a network.
    set ::GLOBAL_TEST_ALGO_DIR ""

    ::AlgorandGoal::WaitForRound $CATCHPOINT_ROUND $TEST_ROOT_DIR/Node

    ::AlgorandGoal::StopNode $TEST_ROOT_DIR/Node

    # close the web proxy
    close -i $WP_SPAWN_ID

    ::AlgorandGoal::StopNode $TEST_ROOT_DIR/Primary

    puts "[clock format [clock seconds] -format %H:%M:%S]: catchpointCatchupTest stop/start test completed"
} EXCEPTION ] } {
   ::AlgorandGoal::Abort "ERROR in catchpointCatchupTest - stop/start: $EXCEPTION"
}