summaryrefslogtreecommitdiffstats
path: root/support/download/dl-wrapper
blob: bb70c98253b94c2a83ddc8064e2113cc64e16b29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/usr/bin/env bash

# This script is a wrapper to the other download backends.
# Its role is to ensure atomicity when saving downloaded files
# back to BR2_DL_DIR, and not clutter BR2_DL_DIR with partial,
# failed downloads.

# To avoid cluttering BR2_DL_DIR, we download to a trashable
# location, namely in $(BUILD_DIR).
# Then, we move the downloaded file to a temporary file in the
# same directory as the final output file.
# This allows us to finally atomically rename it to its final
# name.
# If anything goes wrong, we just remove all the temporaries
# created so far.

# We want to catch any unexpected failure, and exit immediately.
set -e

export BR_BACKEND_DL_GETOPTS=":hc:d:o:n:N:H:ru:qf:e"

main() {
    local OPT OPTARG
    local backend output hfile recurse quiet rc
    local -a uris

    # Parse our options; anything after '--' is for the backend
    while getopts ":c:d:D:o:n:N:H:rf:u:q" OPT; do
        case "${OPT}" in
        c)  cset="${OPTARG}";;
        d)  dl_dir="${OPTARG}";;
        D)  old_dl_dir="${OPTARG}";;
        o)  output="${OPTARG}";;
        n)  raw_base_name="${OPTARG}";;
        N)  base_name="${OPTARG}";;
        H)  hfile="${OPTARG}";;
        r)  recurse="-r";;
        f)  filename="${OPTARG}";;
        u)  uris+=( "${OPTARG}" );;
        q)  quiet="-q";;
        :)  error "option '%s' expects a mandatory argument\n" "${OPTARG}";;
        \?) error "unknown option '%s'\n" "${OPTARG}";;
        esac
    done

    # Forget our options, and keep only those for the backend
    shift $((OPTIND-1))

    if [ -z "${output}" ]; then
        error "no output specified, use -o\n"
    fi

    # Legacy handling: check if the file already exists in the global
    # download directory. If it does, hard-link it. If it turns out it
    # was an incorrect download, we'd still check it below anyway.
    # If we can neither link nor copy, fallback to doing a download.
    # NOTE! This is not atomic, is subject to TOCTTOU, but the whole
    # dl-wrapper runs under an flock, so we're safe.
    if [ ! -e "${output}" -a -e "${old_dl_dir}/${filename}" ]; then
        ln "${old_dl_dir}/${filename}" "${output}" || \
        cp "${old_dl_dir}/${filename}" "${output}" || \
        true
    fi

    # If the output file already exists and:
    # - there's no .hash file: do not download it again and exit promptly
    # - matches all its hashes: do not download it again and exit promptly
    # - fails at least one of its hashes: force a re-download
    # - there's no hash (but a .hash file): consider it a hard error
    if [ -e "${output}" ]; then
        if support/download/check-hash ${quiet} "${hfile}" "${output}" "${output##*/}"; then
            exit 0
        elif [ ${?} -ne 2 ]; then
            # Do not remove the file, otherwise it might get re-downloaded
            # from a later location (i.e. primary -> upstream -> mirror).
            # Do not print a message, check-hash already did.
            exit 1
        fi
        rm -f "${output}"
        warn "Re-downloading '%s'...\n" "${output##*/}"
    fi

    # Look through all the uris that we were given to download the package
    # source
    download_and_check=0
    rc=1
    for uri in "${uris[@]}"; do
        backend_urlencode="${uri%%+*}"
        backend="${backend_urlencode%|*}"
        case "${backend}" in
            git|svn|cvs|bzr|file|scp|hg) ;;
            *) backend="wget" ;;
        esac
        uri=${uri#*+}

        urlencode=${backend#*|}
        # urlencode must be "urlencode"
        [ "${urlencode}" != "urlencode" ] && urlencode=""

        # tmpd is a temporary directory in which backends may store
        # intermediate by-products of the download.
        # tmpf is the file in which the backends should put the downloaded
        # content.
        # tmpd is located in $(BUILD_DIR), so as not to clutter the (precious)
        # $(BR2_DL_DIR)
        # We let the backends create tmpf, so they are able to set whatever
        # permission bits they want (although we're only really interested in
        # the executable bit.)
        tmpd="$(mktemp -d "${BUILD_DIR}/.${output##*/}.XXXXXX")"
        tmpf="${tmpd}/output"

        # Helpers expect to run in a directory that is *really* trashable, so
        # they are free to create whatever files and/or sub-dirs they might need.
        # Doing the 'cd' here rather than in all backends is easier.
        cd "${tmpd}"

        # If the backend fails, we can just remove the content of the temporary
        # directory to remove all the cruft it may have left behind, and try
        # the next URI until it succeeds. Once out of URI to try, we need to
        # cleanup and exit.
        if ! "${OLDPWD}/support/download/${backend}" \
                $([ -n "${urlencode}" ] && printf %s '-e') \
                -c "${cset}" \
                -d "${dl_dir}" \
                -n "${raw_base_name}" \
                -N "${base_name}" \
                -f "${filename}" \
                -u "${uri}" \
                -o "${tmpf}" \
                ${quiet} ${recurse} -- "${@}"
        then
            # cd back to keep path coherence
            cd "${OLDPWD}"
            rm -rf "${tmpd}"
            continue
        fi

        # cd back to free the temp-dir, so we can remove it later
        cd "${OLDPWD}"

        # Check if the downloaded file is sane, and matches the stored hashes
        # for that file
        if support/download/check-hash ${quiet} "${hfile}" "${tmpf}" "${output##*/}"; then
            rc=0
        else
            if [ ${?} -ne 3 ]; then
                rm -rf "${tmpd}"
                continue
            fi

            # the hash file exists and there was no hash to check the file
            # against
            rc=1
        fi
        download_and_check=1
        break
    done

    # We tried every URI possible, none seems to work or to check against the
    # available hash. *ABORT MISSION*
    if [ "${download_and_check}" -eq 0 ]; then
        rm -rf "${tmpd}"
        exit 1
    fi

    # tmp_output is in the same directory as the final output, so we can
    # later move it atomically.
    tmp_output="$(mktemp "${output}.XXXXXX")"

    # 'mktemp' creates files with 'go=-rwx', so the files are not accessible
    # to users other than the one doing the download (and root, of course).
    # This can be problematic when a shared BR2_DL_DIR is used by different
    # users (e.g. on a build server), where all users may write to the shared
    # location, since other users would not be allowed to read the files
    # another user downloaded.
    # So, we restore the 'go' access rights to a more sensible value, while
    # still abiding by the current user's umask. We must do that before the
    # final 'mv', so just do it now.
    # Some backends (cp and scp) may create executable files, so we need to
    # carry the executable bit if needed.
    [ -x "${tmpf}" ] && new_mode=755 || new_mode=644
    new_mode=$(printf "%04o" $((0${new_mode} & ~0$(umask))))
    chmod ${new_mode} "${tmp_output}"

    # We must *not* unlink tmp_output, otherwise there is a small window
    # during which another download process may create the same tmp_output
    # name (very, very unlikely; but not impossible.)
    # Using 'cp' is not reliable, since 'cp' may unlink the destination file
    # if it is unable to open it with O_WRONLY|O_TRUNC; see:
    #   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cp.html
    # Since the destination filesystem can be anything, it might not support
    # O_TRUNC, so 'cp' would unlink it first.
    # Use 'cat' and append-redirection '>>' to save to the final location,
    # since that is the only way we can be 100% sure of the behaviour.
    if ! cat "${tmpf}" >>"${tmp_output}"; then
        rm -rf "${tmpd}" "${tmp_output}"
        exit 1
    fi
    rm -rf "${tmpd}"

    # tmp_output and output are on the same filesystem, so POSIX guarantees
    # that 'mv' is atomic, because it then uses rename() that POSIX mandates
    # to be atomic, see:
    #   http://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html
    if ! mv -f "${tmp_output}" "${output}"; then
        rm -f "${tmp_output}"
        exit 1
    fi

    return ${rc}
}

trace()  { local msg="${1}"; shift; printf "%s: ${msg}" "${my_name}" "${@}"; }
warn()   { trace "${@}" >&2; }
errorN() { local ret="${1}"; shift; warn "${@}"; exit ${ret}; }
error()  { errorN 1 "${@}"; }

my_name="${0##*/}"
main "${@}"
OpenPOWER on IntegriCloud