blob: 9abc9064bb694f4441ccab6ed76c2686d0d8ef05 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
|
# Auto generated by ./tcwg/generate-yamlfiles.sh from tcwg-update-containers.yaml.in and tcwg-update-containers/tcwg-update-jenkins-containers.def. Do not edit.
#BEGIN: tcwg/default.yaml.inc
# -*- mode: Yaml -*-
- property:
name: default-properties
properties:
- authorization:
anonymous:
- job-read
- job-extended-read
everyone-flat:
- job-build
- job-cancel
- build-discarder:
days-to-keep: 30
- scm:
name: jenkins-scripts
scm:
- git:
url: https://git.linaro.org/toolchain/jenkins-scripts.git
refspec: +refs/heads/*:refs/remotes/origin/* +refs/changes/*:refs/changes/*
branches:
- $scripts_branch
basedir: jenkins-scripts
skip-tag: true
reference-repo: /home/tcwg-buildslave/snapshots-ref/jenkins-scripts.git
wipe-workspace: false
clean:
before: true
prune: true
#END: tcwg/default.yaml.inc
- job:
name: tcwg-update-jenkins-containers
project-type: freestyle
defaults: global
properties:
- authorization:
anonymous:
- job-read
- job-extended-read
everyone-flat:
- job-build
- job-cancel
- build-discarder:
days-to-keep: 30
num-to-keep: 100
- throttle:
max-per-node: 1
option: project
parameters:
- label:
name: nodes
# Run on all nodes with exception of benchmarking boards.
# Jenkins containers of benchmarking boards are handled by
# CONTAINER_bmk job.
default: tcwg && !tcwg-bmk-hw
all-nodes: true
matching-label: 'allCases'
description: 'Machines to run on'
- string:
name: distro
default: 'default'
description: 'Distro version to use.'
- bool:
name: force
default: 'false'
description: 'Whether to force update even with no changes in image'
- bool:
name: verbose
default: 'true'
description: 'Whether to be verbose'
- string:
name: scripts_branch
default: master
description: 'Scripts revision to use'
disabled: false
node: tcwg-coordinator
concurrent: true
display-name: 'TCWG CCC Update jenkins containers'
# We need to unshare workspace with $NODE_NAME in the path to
# correctly run on tcwg-bmk-* nodes.
workspace: workspace/tcwg-update-jenkins-containers_$EXECUTOR_NUMBER/$NODE_NAME
scm:
- jenkins-scripts
triggers:
- timed: '@daily'
wrappers:
- timeout:
# Wait at most 5 hours before giving up on updating jenkins
# client container.
timeout: 300
- timestamps
- ssh-agent-credentials:
users:
# tcwg-buildslave user id
- 'e0958a95-204f-4c14-a66c-5e2be6c5d50a'
- build-name:
name: '#${BUILD_NUMBER}-${NODE_NAME}'
builders:
- shell:
command: |
#!/bin/bash
set -ex
# Check if we need to update the image -- run with --dryrun true.
./jenkins-scripts/tcwg-update-host-containers.sh \
--distro "$distro" \
--dryrun true \
--node "$NODE_NAME" \
--force "$force" \
--verbose "$verbose" &
res=0 && wait $! || res=$?
if [ $res = 0 ]; then
# Fast-path exit to avoid bringing the node offline.
echo "$NODE_NAME is up-to-date"
# Skip the rest and mark the build UNSTABLE (aka skipped).
exit 125
elif [ $res = 125 ]; then
echo "$NODE_NAME needs container update"
elif [ $res != 0 ]; then
echo "ERROR: container check failed"
exit $res
fi
# We are about to update the container that is running this.
# The plan is:
# 1. Prevent new builds from starting by putting the node into
# offline mode.
# 2. Wait for current builds to finish. We detect this by
# checking for children process of the jenkins client.
# 3. Trigger a job on the master node to bring this node back
# online. Without this we would restart the jenkins container,
# but the node would still be marked as "offline" and no new
# builds will be scheduled to it.
# 4. Restart the container.
# Mark the node offline.
ssh -p2222 -l tcwg-buildslave@linaro.org ci.linaro.org \
offline-node "$NODE_NAME" \
-m "Updating_jenkins_container:$BUILD_URL"
start_date=$(date +%s)
rm -f timeout
# Wait for current builds to finish.
while true; do
n_busy=$(source jenkins-scripts/jenkins-helpers.sh
print_number_of_busy_executors "$NODE_NAME")
if [ "$n_busy" = "1" ]; then
# We are the only build left.
break
fi
elapsed=$(($(date +%s) - $start_date))
elapsed=$(($elapsed / 60))
if [ "$elapsed" -gt "270" ]; then
# We'll timeout in 30 minutes; give up on the update and
# bring the node back online.
# We budget 30 minutes for tcwg-cleanup-stale-workspaces.sh
# below.
touch timeout
break
fi
# Wait for other builds to complete.
sleep 60
done
unstable-return: 125
- conditional-step:
condition-kind: current-status
steps:
- trigger-builds:
- project: tcwg-update-jenkins-containers-online-node
predefined-parameters: |
node=$NODE_NAME
build_num=$BUILD_NUMBER
block: false
- shell:
command: |
#!/bin/bash
set -ex
if [ -f timeout ]; then
exit 125
fi
# Cleanup workspace directory while the node is idle.
(
set +e
$WORKSPACE/jenkins-scripts/tcwg-cleanup-stale-workspaces.sh \
--days 3 --workspace_top $HOME/workspace
)
./jenkins-scripts/tcwg-update-host-containers.sh \
--distro "$distro" \
--dryrun false \
--node "$NODE_NAME" \
--force "$force" \
--verbose "$verbose" || exit 125
unstable-return: 125
- job:
name: tcwg-update-jenkins-containers-online-node
project-type: freestyle
defaults: global
properties:
- authorization:
anonymous:
- job-read
- job-extended-read
everyone-flat:
- job-build
- job-cancel
- build-discarder:
days-to-keep: 30
num-to-keep: 100
parameters:
- string:
name: node
default: ''
description: 'NODE_NAME to bring online'
- string:
name: build_num
default: ''
description: 'BUILD_NUMBER to wait for to finish'
disabled: false
concurrent: true
display-name: 'TCWG CCC Update jenkins containers online-node'
wrappers:
- timeout:
timeout: 60
- timestamps
- ssh-agent-credentials:
users:
# tcwg-buildslave user id
- 'e0958a95-204f-4c14-a66c-5e2be6c5d50a'
- build-name:
name: '#${BUILD_NUMBER}-#${build_num}-${node}'
builders:
- shell:
command: |
#!/bin/bash
set -ex
# Below logic was adapted from jenkins-scripts/tcwg_bmk-build.sh:
# benchmark().
ssh_cmd=(ssh -p2222 -l tcwg-buildslave@linaro.org
-oStrictHostKeyChecking=no -oUserKnownHostsFile=/dev/null)
# ??? Below loop can fail for reasons I can't understand.
# "|| true" should the subshell always exit with "0",
# and I can't see how "| tee | sed" can fail.
# Bring the node back online as we exit due to any reason,
# and ignore shell errors so that we exit only when see
# "Finished: " line in the console.log.
trap "${ssh_cmd[*]} ci.linaro.org online-node $node" EXIT
set +e
while true; do
sleep 60
(timeout 1m \
"${ssh_cmd[@]}" ci.linaro.org console \
tcwg-update-jenkins-containers $build_num || true) \
| tee console.log | sed -e "s/^/$node: /"
build_status=$(tail -n 1 console.log)
case "$build_status" in
"Finished: "*) break ;;
esac
done
# checksum: 6ffc6ff4f71c52329c02ca6e6f932e10
|