Skip to content

Commit 881dbcd

Browse files
authored
fix: add a check for partial response data (#435)
* chore: update checksum mismatch error message with additional context * trigger retry if lib detects incomplete read * update condition and message * move check to before checksum mismatch errors
1 parent ef91e27 commit 881dbcd

File tree

4 files changed

+128
-21
lines changed

4 files changed

+128
-21
lines changed

packages/google-resumable-media/google/resumable_media/requests/download.py

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@
4444
Please restart the download.
4545
"""
4646

47+
_RESPONSE_HEADERS_INFO = """\
48+
49+
The X-Goog-Stored-Content-Length is {}. The X-Goog-Stored-Content-Encoding is {}.
50+
51+
The download request read {} bytes of data.
52+
If the download was incomplete, please check the network connection and restart the download.
53+
"""
54+
4755

4856
class Download(_request_helpers.RequestsMixin, _download.Download):
4957
"""Helper to manage downloading a resource from a Google API.
@@ -131,14 +139,32 @@ def _write_to_stream(self, response):
131139
and response.status_code != http.client.PARTIAL_CONTENT
132140
):
133141
actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())
142+
134143
if actual_checksum != expected_checksum:
135-
msg = _CHECKSUM_MISMATCH.format(
136-
self.media_url,
137-
expected_checksum,
138-
actual_checksum,
139-
checksum_type=self.checksum.upper(),
144+
headers = self._get_headers(response)
145+
x_goog_encoding = headers.get("x-goog-stored-content-encoding")
146+
x_goog_length = headers.get("x-goog-stored-content-length")
147+
content_length_msg = _RESPONSE_HEADERS_INFO.format(
148+
x_goog_length, x_goog_encoding, self._bytes_downloaded
140149
)
141-
raise common.DataCorruption(response, msg)
150+
if (
151+
x_goog_length
152+
and self._bytes_downloaded < int(x_goog_length)
153+
and x_goog_encoding != "gzip"
154+
):
155+
# The library will attempt to trigger a retry by raising a ConnectionError, if
156+
# (a) bytes_downloaded is less than response header x-goog-stored-content-length, and
157+
# (b) the object is not gzip-compressed when stored in Cloud Storage.
158+
raise ConnectionError(content_length_msg)
159+
else:
160+
msg = _CHECKSUM_MISMATCH.format(
161+
self.media_url,
162+
expected_checksum,
163+
actual_checksum,
164+
checksum_type=self.checksum.upper(),
165+
)
166+
msg += content_length_msg
167+
raise common.DataCorruption(response, msg)
142168

143169
def consume(
144170
self,
@@ -321,13 +347,30 @@ def _write_to_stream(self, response):
321347
actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())
322348

323349
if actual_checksum != expected_checksum:
324-
msg = _CHECKSUM_MISMATCH.format(
325-
self.media_url,
326-
expected_checksum,
327-
actual_checksum,
328-
checksum_type=self.checksum.upper(),
350+
headers = self._get_headers(response)
351+
x_goog_encoding = headers.get("x-goog-stored-content-encoding")
352+
x_goog_length = headers.get("x-goog-stored-content-length")
353+
content_length_msg = _RESPONSE_HEADERS_INFO.format(
354+
x_goog_length, x_goog_encoding, self._bytes_downloaded
329355
)
330-
raise common.DataCorruption(response, msg)
356+
if (
357+
x_goog_length
358+
and self._bytes_downloaded < int(x_goog_length)
359+
and x_goog_encoding != "gzip"
360+
):
361+
# The library will attempt to trigger a retry by raising a ConnectionError, if
362+
# (a) bytes_downloaded is less than response header x-goog-stored-content-length, and
363+
# (b) the object is not gzip-compressed when stored in Cloud Storage.
364+
raise ConnectionError(content_length_msg)
365+
else:
366+
msg = _CHECKSUM_MISMATCH.format(
367+
self.media_url,
368+
expected_checksum,
369+
actual_checksum,
370+
checksum_type=self.checksum.upper(),
371+
)
372+
msg += content_length_msg
373+
raise common.DataCorruption(response, msg)
331374

332375
def consume(
333376
self,

packages/google-resumable-media/tests/system/requests/test_download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ def test_corrupt_download(self, add_files, corrupting_transport, checksum):
460460
info[checksum],
461461
checksum_type=checksum.upper(),
462462
)
463-
assert exc_info.value.args == (msg,)
463+
assert msg in exc_info.value.args[0]
464464

465465
def test_corrupt_download_no_check(self, add_files, corrupting_transport):
466466
for info in ALL_FILES:

packages/google-resumable-media/tests/unit/requests/test_download.py

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,11 @@ def test__write_to_stream_with_hash_check_fail(self, checksum):
105105
msg = download_mod._CHECKSUM_MISMATCH.format(
106106
EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper()
107107
)
108-
assert error.args[0] == msg
108+
assert msg in error.args[0]
109+
assert (
110+
f"The download request read {download._bytes_downloaded} bytes of data."
111+
in error.args[0]
112+
)
109113

110114
# Check mocks.
111115
response.__enter__.assert_called_once_with()
@@ -167,6 +171,29 @@ def test__write_to_stream_with_invalid_checksum_type(self):
167171
error = exc_info.value
168172
assert error.args[0] == "checksum must be ``'md5'``, ``'crc32c'`` or ``None``"
169173

174+
@pytest.mark.parametrize("checksum", ["md5", "crc32c"])
175+
def test__write_to_stream_incomplete_read(self, checksum):
176+
stream = io.BytesIO()
177+
download = download_mod.Download(EXAMPLE_URL, stream=stream, checksum=checksum)
178+
179+
chunk1 = b"first chunk"
180+
mock_full_content_length = len(chunk1) + 123
181+
headers = {"x-goog-stored-content-length": mock_full_content_length}
182+
bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ=="
183+
header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum)
184+
headers[_helpers._HASH_HEADER] = header_value
185+
response = _mock_response(chunks=[chunk1], headers=headers)
186+
187+
with pytest.raises(ConnectionError) as exc_info:
188+
download._write_to_stream(response)
189+
190+
assert not download.finished
191+
error = exc_info.value
192+
assert (
193+
f"The download request read {download._bytes_downloaded} bytes of data."
194+
in error.args[0]
195+
)
196+
170197
def _consume_helper(
171198
self,
172199
stream=None,
@@ -285,7 +312,11 @@ def test_consume_with_stream_hash_check_fail(self, checksum):
285312
msg = download_mod._CHECKSUM_MISMATCH.format(
286313
EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper()
287314
)
288-
assert error.args[0] == msg
315+
assert msg in error.args[0]
316+
assert (
317+
f"The download request read {download._bytes_downloaded} bytes of data."
318+
in error.args[0]
319+
)
289320

290321
# Check mocks.
291322
transport.request.assert_called_once_with(
@@ -544,7 +575,11 @@ def test__write_to_stream_with_hash_check_fail(self, checksum):
544575
msg = download_mod._CHECKSUM_MISMATCH.format(
545576
EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper()
546577
)
547-
assert error.args[0] == msg
578+
assert msg in error.args[0]
579+
assert (
580+
f"The download request read {download._bytes_downloaded} bytes of data."
581+
in error.args[0]
582+
)
548583

549584
# Check mocks.
550585
response.__enter__.assert_called_once_with()
@@ -577,6 +612,31 @@ def test__write_to_stream_with_invalid_checksum_type(self):
577612
error = exc_info.value
578613
assert error.args[0] == "checksum must be ``'md5'``, ``'crc32c'`` or ``None``"
579614

615+
@pytest.mark.parametrize("checksum", ["md5", "crc32c"])
616+
def test__write_to_stream_incomplete_read(self, checksum):
617+
stream = io.BytesIO()
618+
download = download_mod.RawDownload(
619+
EXAMPLE_URL, stream=stream, checksum=checksum
620+
)
621+
622+
chunk1 = b"first chunk"
623+
mock_full_content_length = len(chunk1) + 123
624+
headers = {"x-goog-stored-content-length": mock_full_content_length}
625+
bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ=="
626+
header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum)
627+
headers[_helpers._HASH_HEADER] = header_value
628+
response = _mock_raw_response(chunks=[chunk1], headers=headers)
629+
630+
with pytest.raises(ConnectionError) as exc_info:
631+
download._write_to_stream(response)
632+
633+
assert not download.finished
634+
error = exc_info.value
635+
assert (
636+
f"The download request read {download._bytes_downloaded} bytes of data."
637+
in error.args[0]
638+
)
639+
580640
def _consume_helper(
581641
self,
582642
stream=None,
@@ -699,7 +759,11 @@ def test_consume_with_stream_hash_check_fail(self, checksum):
699759
msg = download_mod._CHECKSUM_MISMATCH.format(
700760
EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper()
701761
)
702-
assert error.args[0] == msg
762+
assert msg in error.args[0]
763+
assert (
764+
f"The download request read {download._bytes_downloaded} bytes of data."
765+
in error.args[0]
766+
)
703767

704768
# Check mocks.
705769
transport.request.assert_called_once_with(

packages/google-resumable-media/tests_async/unit/requests/test_download.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ async def test__write_to_stream_with_hash_check_fail(self, checksum):
9898
good_checksum,
9999
checksum_type=checksum.upper(),
100100
)
101-
assert error.args[0] == msg
101+
assert msg in error.args[0]
102102

103103
@pytest.mark.asyncio
104104
@pytest.mark.parametrize("checksum", ["md5", "crc32c"])
@@ -263,7 +263,7 @@ async def test_consume_with_stream_hash_check_fail(self, checksum):
263263
good_checksum,
264264
checksum_type=checksum.upper(),
265265
)
266-
assert error.args[0] == msg
266+
assert msg in error.args[0]
267267

268268
# Check mocks.
269269
transport.request.assert_called_once_with(
@@ -353,7 +353,7 @@ async def test__write_to_stream_with_hash_check_fail(self, checksum):
353353
good_checksum,
354354
checksum_type=checksum.upper(),
355355
)
356-
assert error.args[0] == msg
356+
assert msg in error.args[0]
357357

358358
@pytest.mark.asyncio
359359
async def test__write_to_stream_with_invalid_checksum_type(self):
@@ -482,7 +482,7 @@ async def test_consume_with_stream_hash_check_fail(self, checksum):
482482
good_checksum,
483483
checksum_type=checksum.upper(),
484484
)
485-
assert error.args[0] == msg
485+
assert msg in error.args[0]
486486

487487
# Check mocks.
488488
transport.request.assert_called_once_with(

0 commit comments

Comments
 (0)