sb: Change urlib to urllib2 on Python2 and add user agent support.

Some sites block the standard python user agent. Change to Wget
as it seems to make the tested sites behave.

Limit the size of long URLs when logging to the user.

Closes #2656.
This commit is contained in:
Chris Johns 2016-03-17 16:47:31 +11:00
parent 0e22c3c7ff
commit 31f33047a2

View File

@ -32,7 +32,7 @@ try:
import urllib.request as urllib_request import urllib.request as urllib_request
import urllib.parse as urllib_parse import urllib.parse as urllib_parse
except ImportError: except ImportError:
import urllib as urllib_request import urllib2 as urllib_request
import urlparse as urllib_parse import urlparse as urllib_parse
import cvs import cvs
@ -69,6 +69,13 @@ def _humanize_bytes(bytes, precision = 1):
break break
return '%.*f%s' % (precision, float(bytes) / factor, suffix) return '%.*f%s' % (precision, float(bytes) / factor, suffix)
def _sensible_url(url, used = 0):
space = 150 - used - 15
if len(url) > space:
size = (space - 5) / 2
url = url[:size] + ' ... ' + url[-size:]
return url
def _hash_check(file_, absfile, macros, remove = True): def _hash_check(file_, absfile, macros, remove = True):
failed = False failed = False
hash = sources.get_hash(file_.lower(), macros) hash = sources.get_hash(file_.lower(), macros)
@ -325,7 +332,8 @@ def _http_downloader(url, local, config, opts):
if url.startswith('https://api.github.com'): if url.startswith('https://api.github.com'):
url = urllib_parse.urljoin(url, config.expand('tarball/%{version}')) url = urllib_parse.urljoin(url, config.expand('tarball/%{version}'))
dst = os.path.relpath(path.host(local)) dst = os.path.relpath(path.host(local))
log.notice('download: %s -> %s' % (url, dst)) log.output('download: %s -> %s' % (url, dst))
log.notice('download: %s -> %s' % (_sensible_url(url, len(dst)), dst))
failed = False failed = False
if _do_download(opts): if _do_download(opts):
_in = None _in = None
@ -337,24 +345,29 @@ def _http_downloader(url, local, config, opts):
_last_percent = 200.0 _last_percent = 200.0
_last_msg = '' _last_msg = ''
_have_status_output = False _have_status_output = False
_url = url
try: try:
try: try:
_in = None _in = None
_ssl_context = None _ssl_context = None
_urllib_url = url # See #2656
_req = urllib_request.Request(_url)
_req.add_header('User-Agent', 'Wget/1.16.3 (freebsd10.1)')
try: try:
import ssl import ssl
_ssl_context = ssl._create_unverified_context() _ssl_context = ssl._create_unverified_context()
_in = urllib_request.urlopen(_urllib_url, context = _ssl_context) _in = urllib_request.urlopen(_req, context = _ssl_context)
except: except:
_ssl_context = None _ssl_context = None
if _ssl_context is None: if _ssl_context is None:
_in = urllib_request.urlopen(_urllib_url) _in = urllib_request.urlopen(_req)
if url != _in.geturl(): if _url != _in.geturl():
log.notice(' redirect: %s' % (_in.geturl())) _url = _in.geturl()
log.output(' redirect: %s' % (_url))
log.notice(' redirect: %s' % (_sensible_url(_url)))
_out = open(path.host(local), 'wb') _out = open(path.host(local), 'wb')
try: try:
_length = int(_in.info().getheader('Content-Length').strip()) _length = int(_in.info()['Content-Length'].strip())
except: except:
pass pass
while True: while True:
@ -379,17 +392,17 @@ def _http_downloader(url, local, config, opts):
log.stdout_raw('\n\r') log.stdout_raw('\n\r')
raise raise
except IOError as err: except IOError as err:
log.notice('download: %s: error: %s' % (url, str(err))) log.notice('download: %s: error: %s' % (_sensible_url(_url), str(err)))
if path.exists(local): if path.exists(local):
os.remove(path.host(local)) os.remove(path.host(local))
failed = True failed = True
except ValueError as err: except ValueError as err:
log.notice('download: %s: error: %s' % (url, str(err))) log.notice('download: %s: error: %s' % (_sensible_url(_url), str(err)))
if path.exists(local): if path.exists(local):
os.remove(path.host(local)) os.remove(path.host(local))
failed = True failed = True
except: except:
msg = 'download: %s: error' % (url) msg = 'download: %s: error' % (_sensible_url(_url))
log.stderr(msg) log.stderr(msg)
log.notice(msg) log.notice(msg)
if _in is not None: if _in is not None: