aboutsummaryrefslogtreecommitdiff
path: root/license_protected_downloads/common.py
blob: a10f31869c3c567a9165edfe80ae376a3006e885 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import fnmatch
import os

import boto

from django.conf import settings
from django.core.cache import cache
from django.http import Http404

from license_protected_downloads import models
from license_protected_downloads.artifact import(
    LocalArtifact,
    S3Artifact,
)


def safe_path_join(base_path, *paths):
    """os.path.join with check that result is inside base_path.

    Checks that the generated path doesn't end up outside the target
    directory, so server accesses stay where we expect them.
    """

    target_path = os.path.join(base_path, *paths)

    if not target_path.startswith(base_path):
        return None

    if not os.path.normpath(target_path) == target_path.rstrip("/"):
        return None

    return target_path


def cached_call(key, func, *args, **kwargs):
    key = func.__name__ + key
    v = cache.get(key)
    if v:
        return v
    v = func(*args, **kwargs)
    cache.set(key, v)
    return v


def _handle_wildcard(request, fullpath):
    path, name = os.path.split(fullpath)

    if not os.path.isdir(path):
        return None

    match = None
    for f in os.listdir(path):
        if fnmatch.fnmatch(f, name):
            if match:
                # change request.path so that the 404.html page can show
                # a descriptive error
                request.path = 'Multiple files match this expression'
                raise Http404
            match = os.path.join(path, f)
    return match


def _handle_s3_wildcard(request, bucket, prefix):
    prefix, base = os.path.split(prefix)
    if '*' in base or '?' in base:
        match = None
        prefix += '/'
        items = list(bucket.list(delimiter='/', prefix=prefix))
        for item in items:
            if fnmatch.fnmatch(os.path.basename(item.name), base):
                if match:
                    request.path = 'Multiple files match this expression'
                    raise Http404
                match = item
        if match:
            return S3Artifact(bucket, match, None, False)


def _find_served_paths(path, request):
    served_paths = settings.SERVED_PATHS
    # if key is in request.GET["key"] then need to mod path and give
    # access to a per-key directory.
    if "key" in request.GET:
        key_details = models.APIKeyStore.objects.filter(key=request.GET["key"])
        if key_details:
            path = os.path.join(request.GET["key"], path)

            # Private uploads are in a separate path (or can be), so set
            # served_paths as needed.
            if not key_details[0].public:
                served_paths = [settings.UPLOAD_PATH]
    return served_paths, path


def _find_s3_artifact(request, path):
    b = S3Artifact.get_bucket()
    if not b:
        return  # s3 isn't configured

    prefix = settings.S3_PREFIX_PATH + S3Artifact.pathname2url(path)
    if prefix[-1] == '/':
        # s3 listing give sub dir, we don't want that
        prefix = prefix[:-1]

    items = b.list(delimiter='/', prefix=prefix)
    for item in items:
        if isinstance(item, boto.s3.prefix.Prefix):
            if item.name == prefix + '/':
                return S3Artifact(b, item, None, False)
        else:
            if item.name == prefix:
                return S3Artifact(b, item, None, False)
    return _handle_s3_wildcard(request, b, prefix)


def find_artifact(request, path):
    """Return a Artifact object representing a directory or file we serve"""
    served_paths, path = _find_served_paths(path, request)
    for basepath in served_paths:
        fullpath = safe_path_join(basepath, path)
        if fullpath is None:
            break
        if os.path.isfile(fullpath) or os.path.isdir(fullpath):
            return LocalArtifact(None, '', path, False, basepath)

        fullpath = _handle_wildcard(request, fullpath)
        if fullpath:
            path = fullpath[len(basepath) + 1:]
            return LocalArtifact(None, '', path, False, basepath)

    r = _find_s3_artifact(request, path)
    if r:
        return r

    raise Http404


def _sort_artifacts(a, b):
    '''Ensures directory listings follow our ordering rules for artifacts.

    If the directory is all numbers it sorts them numerically. The "latest"
    entry will always be the first entry. Else use standard sorting.
    '''
    a = a.file_name
    b = b.file_name
    try:
        # we want listings of build numbers (integers) and releases (floats eg
        # "16.12" to listed in reverse order so they show newest to oldest
        return cmp(float(b), float(a))
    except:
        pass
    # always give preference to make "latest" show first
    if a == 'latest':
        return -1
    elif b == 'latest':
        return 1

    # just do a normal string sort
    return cmp(a, b)


def _s3_list(bucket, url):
    prefix = settings.S3_PREFIX_PATH + url
    if prefix[-1] != '/':
        # s3 listing needs '/' to do a dir listing
        prefix = prefix + '/'

    for item in bucket.list(delimiter='/', prefix=prefix):
        if item.name != prefix:
            yield item


def dir_list(artifact, human_readable=True):
    url = artifact.url()
    artifacts = []
    if isinstance(artifact, LocalArtifact):
        fp = artifact.full_path
        artifacts = [LocalArtifact(artifact, url, x, human_readable, fp)
                     for x in os.listdir(fp)]

    b = S3Artifact.get_bucket()
    if b:
        for item in _s3_list(b, url[1:]):
            artifacts.append(S3Artifact(b, item, artifact, human_readable))

    artifacts.sort(_sort_artifacts)

    # s3 and local could return duplicate names. Since the artifacts are sorted
    # we can check if the last names match and skip duplicates if needed. This
    # gives precedence to local artifacts since they show up first in the array
    last_name = None
    listing = []
    for artifact in artifacts:
        if last_name != artifact.file_name and not artifact.hidden():
            listing.append(artifact.get_listing())

        last_name = artifact.file_name
    return listing