From ad63729adb1cf04fdf954e3c51d98dc796b5faf0 Mon Sep 17 00:00:00 2001 From: Volodymyr Kovalenko Date: Mon, 11 Oct 2021 12:19:34 +0300 Subject: [PATCH 1/3] fix s3 storage --- Dockerfile | 2 +- requirements.txt | 2 +- .../document_service/storages/s3_storage.py | 48 +++---- src/prozorro_sale/document_service/utils.py | 2 +- test-requirements.txt | 3 +- tests/unit/helpers.py | 27 ++++ tests/unit/test_s3_storage.py | 136 ++++++++++++++++++ tests/unit/test_storage.py | 36 +---- 8 files changed, 197 insertions(+), 59 deletions(-) create mode 100644 tests/unit/helpers.py create mode 100644 tests/unit/test_s3_storage.py diff --git a/Dockerfile b/Dockerfile index 8009746..5ceff04 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3-slim as base +FROM python:3.9-slim as base WORKDIR /document_service ENV PYTHONUNBUFFERED True diff --git a/requirements.txt b/requirements.txt index b0d793a..dc6f022 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -aiobotocore==1.2.1 +aiobotocore==1.4.2 pyjwt~=2.0.0 cryptography==3.4.4 aiohttp==3.7.3 diff --git a/src/prozorro_sale/document_service/storages/s3_storage.py b/src/prozorro_sale/document_service/storages/s3_storage.py index e943711..1e3e76d 100644 --- a/src/prozorro_sale/document_service/storages/s3_storage.py +++ b/src/prozorro_sale/document_service/storages/s3_storage.py @@ -11,19 +11,15 @@ from botocore.exceptions import ClientError from prozorro_sale.document_service.storages.base_storage import BaseStorage, DATETIME_FORMAT from prozorro_sale.document_service.errors import FileNotFound, KeyNotFound, HeaderNotExists -CLIENT = None - def get_client(): - global CLIENT - if not CLIENT: - CLIENT = aiobotocore.get_session().create_client( - 's3', - region_name=os.environ['BUCKET_HOST'], - aws_secret_access_key=os.environ['BUCKET_SECRET_KEY'], - aws_access_key_id=os.environ['BUCKET_ACCESS_KEY'] - ) - return CLIENT + client = aiobotocore.get_session().create_client( + 's3', + region_name=os.environ['BUCKET_HOST'], + aws_secret_access_key=os.environ['BUCKET_SECRET_KEY'], + aws_access_key_id=os.environ['BUCKET_ACCESS_KEY'] + ) + return client class S3Storage(BaseStorage): @@ -56,15 +52,15 @@ class S3Storage(BaseStorage): raise FileNotFound content_type = data['Content-Type'] - client = get_client() - await client.put_object( - Bucket=self.bucket, - Key=f"{scope}/{uuid}", - Body=data.pop('body'), - ContentDisposition=data.pop('Content-Disposition'), - ContentType=data.pop('Content-Type'), - Metadata=data - ) + async with get_client() as client: + await client.put_object( + Bucket=self.bucket, + Key=f"{scope}/{uuid}", + Body=data.pop('body'), + ContentDisposition=data.pop('Content-Disposition'), + ContentType=data.pop('Content-Type'), + Metadata=data + ) data['id'] = uuid return { 'id': uuid, @@ -78,12 +74,14 @@ class S3Storage(BaseStorage): } async def get(self, uuid, scope, request): - client = get_client() try: - response = await client.get_object(Bucket=self.bucket, Key=f"{scope}/{uuid}") + async with get_client() as client: + response = await client.get_object(Bucket=self.bucket, Key=f"{scope}/{uuid}") except ClientError: raise KeyNotFound(uuid) - content = await response['Body'].read() + + async with response['Body'] as stream: + content = await stream.read() response_headers = response['ResponseMetadata']['HTTPHeaders'] return web.Response( @@ -96,9 +94,9 @@ class S3Storage(BaseStorage): ) async def get_metadata(self, uuid, scope): - client = get_client() try: - response = await client.head_object(Bucket=self.bucket, Key=f"{scope}/{uuid}") + async with get_client() as client: + response = await client.head_object(Bucket=self.bucket, Key=f"{scope}/{uuid}") except ClientError: raise KeyNotFound(uuid) diff --git a/src/prozorro_sale/document_service/utils.py b/src/prozorro_sale/document_service/utils.py index d1eab19..3d571c0 100644 --- a/src/prozorro_sale/document_service/utils.py +++ b/src/prozorro_sale/document_service/utils.py @@ -36,7 +36,7 @@ async def excepts_errors_middleware(request, handler): return web.json_response(data={'error': 'No token provided'}, status=403) except errors.KeyNotFound as ex: LOG.error(str(ex)) - return Response(status=404) + return web.json_response(data={'error': f'The specified key does not found: {ex}'}, status=404) except asyncio.TimeoutError: LOG.exception('Got client session timeout exception') return web.json_response( diff --git a/test-requirements.txt b/test-requirements.txt index f133f91..4b474b6 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,4 @@ coverage nose -aiounittest \ No newline at end of file +aiounittest +moto==2.2.8 \ No newline at end of file diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py new file mode 100644 index 0000000..1d1c826 --- /dev/null +++ b/tests/unit/helpers.py @@ -0,0 +1,27 @@ +from unittest.mock import MagicMock + + +async def get_text(): + return 'test' + + +async def get_file_data(): + return b'test_data' + + +def get_mocked_data(): + main_mock = MagicMock() + mock_doc_type = MagicMock() + mock_file = MagicMock() + + mock_doc_type.name = 'documentType' + mock_doc_type.text = get_text + + mock_file.name = 'file' + mock_file.headers = { + 'Content-Type': 'plain/text', + 'Content-Disposition': 'form-data; name="file"; filename="foo.txt"' + } + mock_file.read = get_file_data + main_mock.__aiter__.return_value = [mock_doc_type, mock_file] + return main_mock diff --git a/tests/unit/test_s3_storage.py b/tests/unit/test_s3_storage.py new file mode 100644 index 0000000..f4313d4 --- /dev/null +++ b/tests/unit/test_s3_storage.py @@ -0,0 +1,136 @@ +import os +import boto3 +import io +import aiobotocore + +from aiounittest import AsyncTestCase +from prozorro_sale.document_service.storages.s3_storage import S3Storage +from prozorro_sale.document_service.errors import KeyNotFound +from moto import mock_s3 +from botocore import awsrequest +from botocore.stub import Stubber +from unittest.mock import patch +from aiobotocore.response import StreamingBody +from tests.unit.helpers import get_mocked_data, get_file_data + + +class MonkeyPatchedAWSResponse(awsrequest.AWSResponse): + raw_headers = {} + + async def read(self): + return self.text + + +awsrequest.AWSResponse = MonkeyPatchedAWSResponse +# monkey-patch put_object solution - AttributeError: 'AWSResponse' object has no attribute 'raw_headers' + + +class RawStream(io.BytesIO): + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + pass + + async def read(self): + return super().read() + + +class S3StorageTest(AsyncTestCase): + + mock_s3 = mock_s3() + bucket_name = 'test-bucket' + + def create_bucket(self): + s3_resource = boto3.resource('s3', region_name='us-east-1') + s3_resource.create_bucket(Bucket=self.bucket_name) + return s3_resource + + def setUp(self) -> None: + self.mock_s3.start() + self.s3_storage = S3Storage(self.bucket_name) + os.environ['BUCKET_HOST'] = 'us-east-1' + os.environ['BUCKET_SECRET_KEY'] = 'testing' + os.environ['BUCKET_ACCESS_KEY'] = 'testing' + self.s3_bucket = self.create_bucket() + self.aiobotocore_session = aiobotocore.get_session() + + def tearDown(self): + self.mock_s3.stop() + + async def test_upload_object(self): + _file = get_mocked_data() + res = await self.s3_storage.upload(post_file=_file, scope='public', doc_type='') + self.assertIn('id', res) + self.assertEqual('public', res['scope']) + self.assertEqual('foo.txt', res['filename']) + self.assertEqual('test', res['documentType']) + self.assertEqual('plain/text', res['format']) + self.assertIn('sha', res) + self.assertIn('hash', res) + self.assertIn('dateCreated', res) + + with self.assertRaises(KeyNotFound): + await self.s3_storage.get(uuid='unknown_key', scope='public', request=None) + + actual_res = self.s3_bucket.Object(self.bucket_name, f"public/{res['id']}").get()['Body'].read() + expected_res = await get_file_data() + self.assertEqual(actual_res, expected_res) + + async def test_get_object(self): + + uuid = 'a'*32 + scope = 'public' + body_content = b"s3 some test binary data" + async with self.aiobotocore_session.create_client('s3', region_name='us-east-1') as client: + with Stubber(client) as stubber: + stubber.add_response( + "get_object", + { + "Body": StreamingBody( + raw_stream=RawStream(body_content), content_length=128 + ), + "ContentLength": 128, + "ResponseMetadata": { + "HTTPHeaders": { + "content-disposition": "test-content-disposition", + "content-type": "test-content-type" + } + } + }, + expected_params={"Bucket": self.bucket_name, "Key": f"{scope}/{uuid}"} + ) + with patch('prozorro_sale.document_service.storages.s3_storage.get_client', + return_value=client) as mock_get_client: + response = await self.s3_storage.get(uuid=uuid, scope=scope, request=None) + self.assertEquals(response.status, 200) + self.assertEquals(response.body, body_content) + mock_get_client.assert_called_once() + + async def test_head_object(self): + uuid = 'b'*32 + scope = 'public' + async with self.aiobotocore_session.create_client('s3', region_name='us-east-1') as client: + with Stubber(client) as stubber: + stubber.add_response( + "head_object", + { + "Metadata": { + "scope": "test_scope", + "documenttype": "test_documenttype", + "ContentType": "test_ContentType", + "sha": "test_sha", + "datecreated": "test_datecreated", + "hash": "test_hash" + } + }, + expected_params={"Bucket": self.bucket_name, "Key": f"{scope}/{uuid}"} + ) + with patch('prozorro_sale.document_service.storages.s3_storage.get_client', + return_value=client) as mock_get_client: + response = await self.s3_storage.get_metadata(uuid=uuid, scope=scope) + self.assertEquals(response.status, 200) + self.assertEquals(response.headers["ETag"], "test_hash") + self.assertEquals(response.headers["X-Date-Created"], "test_datecreated") + mock_get_client.assert_called_once() diff --git a/tests/unit/test_storage.py b/tests/unit/test_storage.py index 5620211..74b87c1 100644 --- a/tests/unit/test_storage.py +++ b/tests/unit/test_storage.py @@ -4,45 +4,21 @@ from aiounittest import AsyncTestCase from prozorro_sale.document_service.errors import KeyNotFound from prozorro_sale.document_service.storages.memory_storage import MemoryStorage +from tests.unit.helpers import get_mocked_data -async def get_text(): - return 'test' - - -async def get_file_data(): - return b'test' - - -class StorageTest(AsyncTestCase): +class MemoryStorageTest(AsyncTestCase): def setUp(self) -> None: self.storage_obj = MemoryStorage('') self.doc_type = 'illustration' - def get_mocked_data(self): - main_mock = MagicMock() - mock_doc_type = MagicMock() - mock_file = MagicMock() - - mock_doc_type.name = 'documentType' - mock_doc_type.text = get_text - - mock_file.name = 'file' - mock_file.headers = { - 'Content-Disposition': 'form-data; name="file"; filename="coverage-report-25e0d270.zip"', - 'Content-Type': 'text/plain', - } - mock_file.read = get_file_data - main_mock.__aiter__.return_value = [mock_doc_type, mock_file] - return main_mock - async def test_get_undefined_key(self): with self.assertRaises(KeyNotFound): await self.storage_obj.get(uuid=uuid4().hex, scope='private') async def test_upload(self): - post_file = self.get_mocked_data() + post_file = get_mocked_data() data = await self.storage_obj.upload(post_file, 'private', self.doc_type) self.assertIn('id', data) self.assertIn('scope', data) @@ -53,15 +29,15 @@ class StorageTest(AsyncTestCase): self.assertIn('dateCreated', data) async def test_get_file(self): - post_file = self.get_mocked_data() + post_file = get_mocked_data() upload_data = await self.storage_obj.upload(post_file, 'private', self.doc_type) response = await self.storage_obj.get(upload_data['id'], 'private') - self.assertEqual(response.body, b'test') + self.assertEqual(response.body, b'test_data') async def test_get_file_metadata(self): - post_file = self.get_mocked_data() + post_file = get_mocked_data() upload_data = await self.storage_obj.upload(post_file, 'private', self.doc_type) data = await self.storage_obj.get_metadata(upload_data['id'], 'private') self.assertIn('X-Scope', data.headers) -- GitLab From 1058f7dcf40f254748978bcf3da011df77548c04 Mon Sep 17 00:00:00 2001 From: Volodymyr Kovalenko Date: Wed, 6 Oct 2021 10:45:57 +0300 Subject: [PATCH 2/3] validate uploaded filetypes --- Dockerfile | 2 + fixtures/1.pdf | 198 ++++++++++++++++++ fixtures/2.gif | Bin 0 -> 18135 bytes requirements.txt | 1 + src/prozorro_sale/document_service/errors.py | 4 + .../document_service/settings.py | 13 ++ .../document_service/storages/base_storage.py | 21 +- .../storages/memory_storage.py | 5 +- .../document_service/storages/s3_storage.py | 7 +- .../storages/swift_storage.py | 8 +- src/prozorro_sale/document_service/utils.py | 6 + test-requirements.txt | 3 +- tests/integration/test_api.py | 75 ++++++- 13 files changed, 332 insertions(+), 11 deletions(-) create mode 100644 fixtures/1.pdf create mode 100644 fixtures/2.gif create mode 100644 src/prozorro_sale/document_service/settings.py diff --git a/Dockerfile b/Dockerfile index 5ceff04..56cc4df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM python:3.9-slim as base WORKDIR /document_service ENV PYTHONUNBUFFERED True +RUN apt-get update && apt-get install -y --no-install-recommends libmagic1 COPY requirements.txt . RUN pip install -i https://pypi-int.prozorro.sale/ -r requirements.txt COPY secrets /secrets @@ -20,6 +21,7 @@ RUN echo $version && sed -i "s/##VERSION##/$version/g" prozorro_sale/__init__.py FROM test_base as test COPY src/ . COPY tests ./tests +COPY fixtures /fixtures ARG version=unknown RUN echo $version && sed -i "s/##VERSION##/$version-dev/g" prozorro_sale/__init__.py diff --git a/fixtures/1.pdf b/fixtures/1.pdf new file mode 100644 index 0000000..dbf091d --- /dev/null +++ b/fixtures/1.pdf @@ -0,0 +1,198 @@ +%PDF-1.3 +%âãÏÓ + +1 0 obj +<< +/Type /Catalog +/Outlines 2 0 R +/Pages 3 0 R +>> +endobj + +2 0 obj +<< +/Type /Outlines +/Count 0 +>> +endobj + +3 0 obj +<< +/Type /Pages +/Count 2 +/Kids [ 4 0 R 6 0 R ] +>> +endobj + +4 0 obj +<< +/Type /Page +/Parent 3 0 R +/Resources << +/Font << +/F1 9 0 R +>> +/ProcSet 8 0 R +>> +/MediaBox [0 0 612.0000 792.0000] +/Contents 5 0 R +>> +endobj + +5 0 obj +<< /Length 1074 >> +stream +2 J +BT +0 0 0 rg +/F1 0027 Tf +57.3750 722.2800 Td +( A Simple PDF File ) Tj +ET +BT +/F1 0010 Tf +69.2500 688.6080 Td +( This is a small demonstration .pdf file - ) Tj +ET +BT +/F1 0010 Tf +69.2500 664.7040 Td +( just for use in the Virtual Mechanics tutorials. More text. And more ) Tj +ET +BT +/F1 0010 Tf +69.2500 652.7520 Td +( text. And more text. And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 628.8480 Td +( And more text. And more text. And more text. And more text. And more ) Tj +ET +BT +/F1 0010 Tf +69.2500 616.8960 Td +( text. And more text. Boring, zzzzz. And more text. And more text. And ) Tj +ET +BT +/F1 0010 Tf +69.2500 604.9440 Td +( more text. And more text. And more text. And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 592.9920 Td +( And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 569.0880 Td +( And more text. And more text. And more text. And more text. And more ) Tj +ET +BT +/F1 0010 Tf +69.2500 557.1360 Td +( text. And more text. And more text. Even more. Continued on page 2 ...) Tj +ET +endstream +endobj + +6 0 obj +<< +/Type /Page +/Parent 3 0 R +/Resources << +/Font << +/F1 9 0 R +>> +/ProcSet 8 0 R +>> +/MediaBox [0 0 612.0000 792.0000] +/Contents 7 0 R +>> +endobj + +7 0 obj +<< /Length 676 >> +stream +2 J +BT +0 0 0 rg +/F1 0027 Tf +57.3750 722.2800 Td +( Simple PDF File 2 ) Tj +ET +BT +/F1 0010 Tf +69.2500 688.6080 Td +( ...continued from page 1. Yet more text. And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 676.6560 Td +( And more text. And more text. And more text. And more text. And more ) Tj +ET +BT +/F1 0010 Tf +69.2500 664.7040 Td +( text. Oh, how boring typing this stuff. But not as boring as watching ) Tj +ET +BT +/F1 0010 Tf +69.2500 652.7520 Td +( paint dry. And more text. And more text. And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 640.8000 Td +( Boring. More, a little more text. The end, and just as well. ) Tj +ET +endstream +endobj + +8 0 obj +[/PDF /Text] +endobj + +9 0 obj +<< +/Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding +>> +endobj + +10 0 obj +<< +/Creator (Rave \(http://www.nevrona.com/rave\)) +/Producer (Nevrona Designs) +/CreationDate (D:20060301072826) +>> +endobj + +xref +0 11 +0000000000 65535 f +0000000019 00000 n +0000000093 00000 n +0000000147 00000 n +0000000222 00000 n +0000000390 00000 n +0000001522 00000 n +0000001690 00000 n +0000002423 00000 n +0000002456 00000 n +0000002574 00000 n + +trailer +<< +/Size 11 +/Root 1 0 R +/Info 10 0 R +>> + +startxref +2714 +%%EOF diff --git a/fixtures/2.gif b/fixtures/2.gif new file mode 100644 index 0000000000000000000000000000000000000000..c37dbcf820b55f5c800f7077124d9c4625cb1ee5 GIT binary patch literal 18135 zcmeHvWl$X5)^6iYaEBlR13>}=mxRFwx4|Je3^2Gu1a}+U-3GUT7(o*}IDvr>2u>0R z0g^zHxxDApx#zp}-uv%6Rk!L(|C;KjdiS%|+Ix4ee%2ZdsJe`-{UIO^#AAVf954Vt z0RR>N0RRXFfKUJk3qSz?3Im`h0Ez_=0Dyo22o!+80!RQr!T=-+Kw<$j0H9$28U>)S z00sasFaU!BFxcyn04xl^q5v%RuOR^#2ta`VEC>XEKrj#p1p;9~PyhslfuJZ56bnKC zAOsAAK!Ffg5E1|(VIU+5gv5f-00<2Op-~_-7K8yn7#IkH0%5S%>_Au;2#W$?v47D7 zU{C-B1+Y*M00qIIAQTjYg+c)+6b6N&pinFn0YDKjC;|mVV4+9=iiAOtC@2yOMFUVY z42niU(O4)3fMQ@!3<`?DURw;s!k}0b6pQ`KbO43`PzV5v009UP3;{wRKv)D6KtN## zC<+0^A`k!q0Ye~A2m}^^1Q19V0*OK(u?RGPK*JDd6atM!U;qRLhQOc@80@td5m*=k zi$Y+rf4LKYApsN;z#>5a5(GnnP)HCK2?dZ)7!ry?La|5$fJDHM2ow^5MIr$t5{5*g zkVq^N4It4lBpQW8W04pDiGd+8C?p1Z9T_ARhQy+fSnOXR0$^wWg$A%_5P$~3&>$2V zghfLEG!%x0qR>z*8UdgYFf;;%MqtrM0F8v9ktj40i$()zGz^VKq0v}020&wAXbcLC z!CprjjfJ7HC^Q!PSJ(j<20&o|ECvK%Krjplg#lqPPyhpkVW21s6pKLs7z7N1Kw%JA z3=+T~VHhL|gT!Lc00s@ipivk!7J~sW7#IeF!eFr1GYEr)VX!C+7W>y!0$^AGbv>)D zCleL~!-7!PGvGS#SSSn&MO{bhI!IUq42wWrd-mF)SR@RKL|uE~+ITD)hDD>U8DA5{ zVqjPd>U#9+L9tjE7K^(6#f2r)Q!_DEff%TXi-_Xk?BfAkKk!MMuGfXoUu)y9WkN*! z6Tl&6(`+jrh$g1JWj)$fF%(b5qgAZgUOAG?bjN*dw7u$eI;Tn!8?>W(EQ{Y5Wj)qW zGm$6iI9?3xteq;7@jqM}>#TcIrW8%euGLjPTcw$G%VyPY4n(a}#n&x)keYyFPNcbR zneD#RWSyv|EVH!9R+QDJOd%-oX}QUnl`^5xXD+hG_1@}bSk$K(YIB1A2Qtu;q1FIJ zLJrZvUAsVR#dzJ8$JepgUVc>^Eg~`{D&Y|uY*;ky&K+nl zXHFq=CiE?Ey^cqfJXB)L)VryJ(WsjxLUZv=^PVW9o&wJz^#FKE?6*}faorVNNs^mh z@iW!sz8g>JQ@XqNW3x2VeGUUC-{Z}Yh;VFi2>!bCfzD-E5m9Muu$eztrh>sJB*v&x zNWRfOJQRy#_d2HF-*|LEp;V#zTv%em40n6aE{uRgEFV(MUaqa|zL*2^!;ihNkNwTH z;PB8Jcx7t~=e;0wAV}qm2?fgQC{%9kj&hrdsAn^TXYzKLA-xkr9xI1)#XDtw+kZ&nclDB}ibyx(ER?Az zIs0?dT}H|pk^>K2;_zw`1tU~hOgs|(skHd4gA%{zq*+WQ&9o0u_F6uKmzXy}q)#nx zr3F%UF{PS^)$c<362q*+^&Mh4Rq9CWw~CV=)QVJX4n9L<1~~IF1tbi*2|gQdu$(dL zrqAAeQoBo#PD!hf=`pu; zWdDpm+(YomzDu`7>Q&2OYpX2YVQt6dIk{5LOP4?;AD#5QfcZGwa*MgB&7TI1HX4p>rKYCLgRdKhky zC8BL}dp%+KS9`_ODw`KtV1t)aJ^5U1#>wXP_oeLymzPej*diRh2DvMD@T~P&W3Q`< zj@$;m@058`|AeQUaxSyd&u+fh@0ybh7)#S=uq!2X^+I^5{3kceZ}~BC(ia&Q3TDUD zahZ_9<+p+0lpn_rEZVJquGO_hs=d)S5od>!&zH4|Z5!hqFyTqlwD$_MJ}S)`CYm>U znmFTT{lM|YC1sWvL+-eEFv%JrPFq`awdfW`vBX&F9yc*~ zO?D+Wj$PSZ;U?8sCv(dP=LThm=H!UHsG@LFl4jrBJ3RfX6r%0>YNbD>lgw9YKTuNX z3XOSMHR`ph$t=EI?+#c zI^C%zby|xk%X5_Z6w0(`e6c0$ywQ%L(fqa6Qb*x=lbh}l3=j7o+coZgbh~yNfqL^X z+l+Tr2v=vRljqCTH7qvbm+7s`*%2?bnt^@UPY9c)HHZUg&btIRp{YeI zn}?%g@jY}DMdH?*S435|>fgiqL!$kef)($zJrC`9x}o_^aZO_3&|?uha?|#%U!2Vs z4XWTO5;v;I8D^8>$WOYA%RK&XrOji4M|xO|XsKm@5CUH330D(gI{Ikf@u+PGWnb`& zIgjZ6nm>^u?;>c0K7h-eaiy}#HT^7?&dc~0@ng8jVuC@KH8aWMc))ERraZTpux$!* zsVVc7TFS`{2UAwVB1yO;2dU~H#9V5I6kN<{L>)N}$z(RcrNWh&%+!$s0t-b-#zxRm zCj(MK>SRG&4O?F}77#e;jR3D>1~;jNgV|-O>js=Mh@{_xP-l2%3t3Cw*HE*Et4DUH-G@Lh9Ztz=bmJaaEgF+rp`eas@g$FlLyl}e8A)=U;Y z1eJ|zQ|iRio+e>DQJJL|}9Em*L*PiZzu^<+mUbjTXxIZQ`$$mH~MK7<5AF&y!ofBZzNhLNMdxbm3NTycb zH_PAPedN%=Fs6LT;O)S=vtbg0MJn;Z;L!4WFQ}myG4^i5-DR7j9A@&JV+pqjoL+tS(9wrW zx*VlXlBX&6@p;KZgP5wnkL2UZuDHG8tvj3~3YbbVPom*fS*z}K)2EcR2;v_Td}Ak& z*V9FL!#_V;EHx4O>&a{xC0>OXJ5r(1-Z-Vy88l6P@&Gew@7VFRo`K`c+ySThf|F!= z?I13T^J}fsHrn?DQCKeRk3e|_ z`^2nhg_odS%TXy|qQ^tBDuhBW+$2xTyXjs^IgfXJ#TOA8ZBiy>IY;`D~@u7C1r@T~72;;w6^ z?mS9BbJ&+o_P8I*wq6a=Ka&XmGJmE2;zopHII+Vn1MkhW(SXahhh*%I_ubAVmZ;Ok z2ybXBRBTgkReUq>wAeITbeI>;GkCDa?!*?H(#>Zdqq9Sh|G$gQsq`(GGOE(E`_H8-y3jNuJ3wl#hhHrJIez>CM5QWk_a4GXC32QB< z)Ww_n>fdZ)BG%{EMcdT7(>8*wY#ZyfbxwSCoPZy2B_CW0Zwv*=N8;*dr|QIi{{CEiamD? z5?aZ273Qn-pwcMQM&S&5tK-S9trzeM`ayVTJj1v(Pux;yoPeT4xijUmR5tx5a8Ry+ zINuqhdO&jcC9c|d+Y*6qt@Of}GWcT)$x4rJ&n=qa(~t~~1JnL&T-QQ-_(R3hg|?Ar zTlac{C*R&{hJS1?0;t};A0m7xS?|v1+bdf)E}!3A+2tq+Hl17+_<}eQ2`ww1u;BgN zyBn_V6hM_uo@d#=n_AYe3I4b|O~XLY^XyeL`!fFbPB~}Z+wEWI+D5-+l5iy}Q~a1W zCP%WKybw0GNK86@S||C^N%t<#=IpbNe$}6Z$pPHwoKo@;y5Qm_s3zDTj*Lv+OV;B6F*9 z$GZcWZ?8BCz&Ye_(l~`qso)m}X*EOS{ElPL7*Sm=OCigGlOb?;`-%VK0*V2knu;Aa zBk#)hhOI<{I0$Z}D7%&G$-R;xov+KSZ9NM5-rHzo)ET;er+|4vs%nD$r&wVIkL#Uo z*@4lwt}X_26SQ1L<4JG1MI!JEK2QJX)ZJ{*A${%&x)aANV6+WEuaU={`ZuDd)B;}! z+239#q3zY2EubWLd`B>my52myd`dpEj_FV&UyQKk))n6apAII2f!Vz20nLC(y18SC zfKtDYbkf@ROrc44s^k-?qYSFMXqoA)x>q^x>8r9|_IGON3M8k@Rac!E>_hG{E?*A@7d!7%LzH1N9*rB+UKaf-V_y7hD9nj znB61e{Pk5DmKpp`^W(@x8G`7AsZ|64i5c^w8xLSq0ihQ3%fW;sYV=XGP?-#Gd}>L< zNNhLb7T`_(c9G}O&^bGa%kTSUgfYo^x+yW7*h+w(7#izAr7ih$1ZU&J%%-J$1rXayQKF0y*uNUXAt@Bm`A6ub-9(*D-36eka zz#4Ut06~8oEmsmUMO!J$Fr1m#dO!%@N6E7EvOD4nm|Ep9ZlW!1>{etm9%UN2zE?`u zIj>XF?E<rP zqAP-h&XF%p4+uB(BkltK6FAW)Y{n}kcPGJ7O2sM z(kd$RWHh5#a@zr&qjyReT_mF3+bOx(awCnX^~uudE2?A_g;RaQZjvQuSh&=8l(usF z#jBz?s6>1N8wz#vK*M;KM7S6+e?| zR&@!#*!HV~tIv=M7X{w_*4kXL_*M#EqsYizSf|ns@AR^#epVoJ9wro%@#5^kE;}Yc zpT0Y?^fFBU!PhCq5Qb1yutLz883S(MW@jiz3SkOGj|_%_jm<=LmZWfxg@gXY`Kzv4C1)&%^p%nGM^#M~ViE zAr3N(nI$Yo;kNY$dK}|1k@s6arfoZWm%NYT)W8i6%My z-Gwu~l!zj&1r60Ig%R1?kHsRHj)w^|)k;vohgTVS{jUXX7Vw)S`!e~&7(>h{c7(53 z#Au#q=G>Dx&5O?c?MlH1v=!tat~Og*Qy7!ot^-BSsjLAMDtPtLj_4qZ3;a z(&4qglPesgHrx|o7|R*MIuCp(9MzV+}P<-6UXGbB5D8bar_IBYi5XhZtN zjoRl|TqrT~hfM|}e!v7l97V!9Z5vP(Jv*7`>t|J9P^WQ;qpqYaG|4QF-)}0Q>7yVt z!C&)Q&gR!^?swva@5D7&TLyGIeDTSw!bZ4tzci%fa#8%U)nGybUp1pLP0me7Xb4Ku zwC@RlYOCjN0l0}0=SA5EAoctUrIB25vIV@71vX`tuN3z!>1JbknLqh6StH2T9d#Pl$?kGr52pMyT_RaVIpDwiCZSuOFN+Ua?yyJVxa(^n%VZrq$22lvVw zL)HxWtSkOTgk+YG^jH-RQa>Y~WG>}Y#wl{imSA34J^b1FLSh_ZA+e)E!=J9`-|aPZ zKeN`g9>o|L?NG+ht*&c@?rS6@q#I}l&Y5j%(1-Eu)~cLZX!fwo)w7(3soLzRzFrU* zvRLHW_iX6m6rhdesP<>JC$l%#;Nbq^EcxT>luPkknGr&hf9cu$*mL+GDy;iHN^o1J z6Sqbp&8hfTq)<-aqlv-(rrBS{a7kHpn_%p_A)KODY1aR1u2ix4ag2)tdac;PUws~5U8 zv0*UB*K-1+weus(1HLVbd}#jca%1v6q5Ss&s9#oO$xX`e??Xf+ z#Q*(ID&07=r*2qhCs@B!*85r$2q>Q}Qz7JM7j+u0l1)$2D$(lPOQ_XH7_J|R)h)N) z9r{hDeNktM)BxKA%Iyg)_uQC2eg3qoW&K&mF(vcWtLDM3hd5_vmS|7A-UQll;S>q@ zyBd{U@(aOlCmIupcNn+j58lsf@SG{L|6=a!AFRkx|E&`&X;NFgcam~zrfWCNn+pAF z|7-c;(-vKS4%Y18`vczA_b!>|E@q&ionCAouXNqtri&hXuFk(VjJ~?3!Q(v6c*GXz z$QsAqc1Q6Qbfm|&m25FoWtT8`X+5zG{3u4wrpL^cActU)SUxpqQO~c0kPf)_rJBm| z;vBqbnzlk;thyeVxA~~3L5(xU3lVSUg;$`ND~#>h-%1wk+qX^lC@T|Vs$rv}jMaVXB$NNfKaR(FiV_?^=PKsMb)kr0V zpfp>hIdR!toO@80!K7>V+!kedu0Mi{ADJ+EGYJ|`dB-b)oSsvf4}a!D&7#JYi=d?@J9*#oaa%CcK+_OtLVahn&fCS*+NJNwJLoBwqnyWe-A4#Mcye0I8Bz6 zm!fgnqE-w_O0bMb;7I3e(VtS)i?TdxHas^hpXW}!P`E7E{F!Dl>%S(@_(N~ZVp!kN zS|I*6@$7=4p`eL-$$daefNP}LW5-FWYiX|AE67zLTtL%UU2a}ajV^`m#k}+LxZ4DL zx*~~NM&CFuv}pyvo|cX!?^#qjLF*|3IyniRx6EI@)KpF3ADeysG1lNwrVAHEB`v{N z{H=}|XU*1RK0$rl{)dD#7Do#l%SPhkWh==Fzq)ei&*2p}xR$tU!oZqxa<}SCjyvX` z^A*-ZX;!Z!$t{=VGB?bJ&6G~vG?ibFDUt|Qmc+$Dk{m}(WkJU2U3Ip6f;YfJCjFS7 z{F1~0oL}N(yVj&sUEmMiFr?=xJUNNADwk|Y_PL_dVEol}{sJxU5V-s~zB4 zNt@!!=;8lkhhG0XdaoDxTz%l6Qq+aPhGe8AbM=VLscU?!Z{g*tGjZS-tCEiQ;{rqM z6wVTJ&*T+nic$X5BzZK?QiNQ3NW4k~;c{KVI5|Q;&c_d)(>BCFY96sE=EbR!at=mz zi5^jWfh+rS7j=E}rn$qlZhoH6O-7oC+uW8Go5yuxlrL6ToStzzd5n8LP)*C$PsfxF zV$1Psaa3ZdBfrdZE{Se(oOih0`L6Xdv1bKSvo@1?@iCiiJ8!2#raMc&P43;j`tjy5 zWmh?o?!GeX)>zwIAmjhAslfyJm!7VN1f79j6)JfjS|852 z%t&XcrZ9;HKEBSy+R~^tiBV=6JTWOuzK`7(ad4YzyT?uYm$o0I3-stw1}7TK{;pJ=eW8-WmgAP)Z?!)5lL;y7h!R@)aJzj z|Lk#8=CLewAEP=h9WYd8ARGS9hPmFY%zLe#8S?b}=))-1;zBD};)`^1o_E|YW3*Rd zU#Q1TnM*mJ;VYXu@Xe{8#~-Jbao#d$=GlE)_S>86*2%UDAIOvTe(lj!zO#2pzdi4> z`6I^2CLFH54#G0kbHK@-N+&|rKgG|)5UuyjhT-AtmB&=uf44B`BB*~Cy9N8EZ@<-@ zOJ_&K^|mbbW4m*e)g9AeuJCH+Y!kJ64Hxz)-y(PKCjcR%<$l_8BcK3^G|{{e3YXU$ z2%)6j4HJ*{^CeK@IYCu8Go`?c8;!;82WK7`2X1wazRbm#T!C+z)+bM-5?RPsPhSCd zxug?Wc?<|E<0WieKBT5p#ugBN_1&wAa8^ItEhWl-GN*eNr*NA!Oj)fYndd!;RlQ1g zZ$Zx0Dh_zCvTc<=C))DV{3ub0CiqCTYI8TsJ@^)~Fv7mFE?K!x{CfhiuGZ-`GSGCT z>OtY>UB(I`ACkrvm#8O#tv{#_J{3h$T-1iWNNS*q>Kd!#g7^b0&Gi(!rrt?ED1Hcg z-yM@)s+{a&{cLEy=sscX-fQJyu=a_mYG?~L$oi#f#kbY_#R2+tAzfgv#WY_ltjDU}G5Yr@o+kjbv=5wsW-t_wBO_w}O>i7uX~ zJz=Fte_~@|DrQAx!Vqb(Ol2yY25N~akZ!m3o)lxNelo!PW7xl1ukwwZ3vUip_|#>n zNyL2U@gdSL<{a_XxZCHPFAUaS+3{kW6C5(UwG?ux-1J_vMo3w6#qT5RHrJ%RYb4Ut z=d!K|M`24wo%UV9C9n9^eyfaye>5zvzu*Pn!>Rir7xa9tyr>}odPP4}oOPDnH_+U` z?@02tkb@}u%tBAL{Y$~iSK{&fh7D%?UpF(QXl_bx-mo>M9-xbisesB18bfcHlH8M& zP^_79g=#YgbDIqh7Z8fi$UDz&}z%TiH%=Wp_Q;^LUDLW(l zPoxKOSd#j%K$10L*jL6lA;IYGuNmbZ)eBF#$eiAMnFbM34Gcr!*F{#-2IU=v!;)Y8 zDK0tVOd6;p*>}M4hK_Mfr}sG3#M9!)jkz$(buY^iS!D~nu}rxYgHAgDm(@(@!!402 z2&tv+_y-ole5w`u*RU>s;xo3*vgCyw>Vnl>YoVTu=&_%lAg*Ugod%1#`tC6jM)pM> zATiu9=+Q%%*7{8wlQGIvGKUy@5O+aLfl4W53cENs+~`B;qkU?{(LxpctOfl_b3(T_ zhc2sL1sXpl?^r$}zj@+v!Sc|SRyhVvMKgOY2u__YCq=j>El|BsVNg#FJ1~%Ipsx+$ z^HP44bkJ!4?RWZo>>_H#`8g`8Pe6d*z*U0Z|5lK|?tp-!j>803*7iWgbDVrkF>8l{ zq}H?~{IPBaIH_9mXzgk1}mJePY$?*NXPiS>8rB>jl&E{ ztXbZSn`o}IX-KnI$-Z05|D+kEABi%6d=4-XXvk5Fn9#JfG0tDIM3r$f=Rcy6 z;PsW8O<%V_+~DD>Wk#xpb2BN2C1&+o%%W8oO(?_5d8|IXQ1f0GALYEU|2%;>2wX8= zB29*TYP3Fsc&)Z$3zA)?NZSEcDp?$G_Y^TdG_{Hyn)Eqz?1d?0xPNz@sJ-Kq%oO@= zW&X5MI8J@B@G+I5R^<}>RbfVeTHU>I8%Lk0`~pqp0G0WiT(`M1`-9WOtjG4%WF$yt zMTlpHd1g|&Bw1(i>Md9IpY^c#&7o!c-k6D=)`1N$SJ@5P=oSSO+l(gMwnYJRb_E&jG|Z$=obtc%RA)s8*_ zs*fgqORX8CuEez~^K(`2rdW;#X$_KbODZuboyNI>p)p>7R-3VBB%j6LuJAa1IA}Xj z%XD~f<)Iq$#^41#y6%bsIax_=>R@pj{&PfjicX#Q`5nYad1YKLG0ClC%=ok5gkclv zWTh;_T6D#ny^FCmZ*P|&dUtEKP|fvr|1%y}xXR&xLWaAG>|=AbiWgGj8q;<~?tR|V z*}h!Vy5Ub^AVJXaiplDo|PA>NYP=DmkhSc#GU-eCB-%Une3-SdRc7(5p!Fq z#9EPWeOJGZAK!TXo=E@uAnWgw1bqe7|AYwnC$s;P+5gGx|77-mGW$Q7{r`Z>{y$QB z`A5I~qu>6~Z~y4GfArfw`tARp`tASO)Ags+^iQejpHkC5rKW#MP5;N0n*Q&m-alPp zf4ao}bcy}x68qC7_TStk_WRhx4ermW0afR8fqOc02HX2{+WY%EBOE-Poj8Nx0dD`m zRJ^_X1N;IV1K{3XoW{-poGyOe2ssK;3Q~xtqnk4#lv5Y(P%UATWFG^ib1Xs6^I^06>@z61?fMSIC>)x z&RzkW4xZkQ9@hjsJvqI+132w{e4Oq5IN@IZpy|))=;v%7@Gpk;oPN%(aQ^^jzw0wW z&VK%XvGjKNzcL4||CHj|S`~dgF9$IZQCw1hk^;a5@UClU{&`TCsp+Wb>#B)~DC-*h z6D0T-2K);Y|2IhVH}KBCL6X0L68{Ft{sv0^8>ILfDD`jf#^1pIawYZO!2fbj+P{JS Og)RNxz<=5a$p0@AL|~r) literal 0 HcmV?d00001 diff --git a/requirements.txt b/requirements.txt index dc6f022..876dcfe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ prozorro-metrics~=1.4.0 prozorro-auth uvloop==0.15.2 python-dotenv==0.15.0 +python-magic==0.4.24 diff --git a/src/prozorro_sale/document_service/errors.py b/src/prozorro_sale/document_service/errors.py index 990848e..f6c2abf 100644 --- a/src/prozorro_sale/document_service/errors.py +++ b/src/prozorro_sale/document_service/errors.py @@ -36,3 +36,7 @@ class FileNotFound(KeyError): class HeaderNotExists(KeyError): pass + + +class InvalidFileType(Exception): + pass diff --git a/src/prozorro_sale/document_service/settings.py b/src/prozorro_sale/document_service/settings.py new file mode 100644 index 0000000..dad4d0a --- /dev/null +++ b/src/prozorro_sale/document_service/settings.py @@ -0,0 +1,13 @@ +import os + +DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f' +BUCKET_NAME = os.environ.get('BUCKET_NAME', '') +MAGIC_BUFFER_RECOMMENDED_SIZE = 2048 + +FORBIDDEN_FILETYPES = [ + 'text/javascript', + 'application/javascript', + 'application/x-javascript', + 'application/ecmascript', + 'text/ecmascript' +] diff --git a/src/prozorro_sale/document_service/storages/base_storage.py b/src/prozorro_sale/document_service/storages/base_storage.py index 4c048af..ba648e2 100644 --- a/src/prozorro_sale/document_service/storages/base_storage.py +++ b/src/prozorro_sale/document_service/storages/base_storage.py @@ -1,7 +1,12 @@ import os - -DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f' -BUCKET_NAME = os.environ.get('BUCKET_NAME', '') +from magic import from_buffer as get_type_from_buffer +from prozorro_sale.document_service.errors import InvalidFileType +from sys import getsizeof +from prozorro_sale.document_service.settings import ( + BUCKET_NAME, + MAGIC_BUFFER_RECOMMENDED_SIZE, + FORBIDDEN_FILETYPES, +) class BaseStorage: @@ -19,6 +24,16 @@ class BaseStorage: async def get_metadata(self, uuid, scope): raise NotImplementedError("Get_metadata method must be implemented in subclass") + @staticmethod + def validate_file_type(file_type): + if file_type in FORBIDDEN_FILETYPES: + raise InvalidFileType(file_type) + + def validate_file_data_mime_type(self, file_data): + if getsizeof(bytes(file_data)) >= MAGIC_BUFFER_RECOMMENDED_SIZE: + file_mime_type = get_type_from_buffer(bytes(file_data), mime=True) + self.validate_file_type(file_type=file_mime_type) + async def create_bucket_instance(app): storage_name = os.environ.get('STORAGE_NAME', 'memory') diff --git a/src/prozorro_sale/document_service/storages/memory_storage.py b/src/prozorro_sale/document_service/storages/memory_storage.py index 79e8c3b..27d29ed 100644 --- a/src/prozorro_sale/document_service/storages/memory_storage.py +++ b/src/prozorro_sale/document_service/storages/memory_storage.py @@ -4,8 +4,9 @@ from datetime import datetime from uuid import uuid4 from aiohttp import web -from prozorro_sale.document_service.storages.base_storage import BaseStorage, DATETIME_FORMAT +from prozorro_sale.document_service.storages.base_storage import BaseStorage from prozorro_sale.document_service.errors import FileNotFound, KeyNotFound, HeaderNotExists +from prozorro_sale.document_service.settings import DATETIME_FORMAT class MemoryStorage(BaseStorage): @@ -23,9 +24,11 @@ class MemoryStorage(BaseStorage): if field.name in data: data[field.name] = await field.text() if field.name == 'file': + self.validate_file_type(file_type=field.headers['Content-Type']) try: file_name = cgi.parse_header(field.headers['Content-Disposition'])[1]['filename'] file_data = await field.read() + self.validate_file_data_mime_type(file_data) data.update({ 'Content-Type': field.headers['Content-Type'], 'Content-Disposition': field.headers['Content-Disposition'], diff --git a/src/prozorro_sale/document_service/storages/s3_storage.py b/src/prozorro_sale/document_service/storages/s3_storage.py index 1e3e76d..9696ef0 100644 --- a/src/prozorro_sale/document_service/storages/s3_storage.py +++ b/src/prozorro_sale/document_service/storages/s3_storage.py @@ -7,9 +7,10 @@ import os from aiohttp import web from botocore.exceptions import ClientError - -from prozorro_sale.document_service.storages.base_storage import BaseStorage, DATETIME_FORMAT +from prozorro_sale.document_service.storages.base_storage import BaseStorage from prozorro_sale.document_service.errors import FileNotFound, KeyNotFound, HeaderNotExists +from prozorro_sale.document_service.settings import DATETIME_FORMAT + def get_client(): @@ -34,9 +35,11 @@ class S3Storage(BaseStorage): if field.name in data: data[field.name] = await field.text() if field.name == 'file': + self.validate_file_type(file_type=field.headers['Content-Type']) try: file_name = cgi.parse_header(field.headers['Content-Disposition'])[1]['filename'] file_data = await field.read() + self.validate_file_data_mime_type(file_data) data.update({ 'Content-Type': field.headers['Content-Type'], 'Content-Disposition': field.headers['Content-Disposition'], diff --git a/src/prozorro_sale/document_service/storages/swift_storage.py b/src/prozorro_sale/document_service/storages/swift_storage.py index 8a1360f..58f2d0f 100644 --- a/src/prozorro_sale/document_service/storages/swift_storage.py +++ b/src/prozorro_sale/document_service/storages/swift_storage.py @@ -12,8 +12,9 @@ from aiohttp import web from prozorro_sale.document_service.errors import (RetryRequestException, FileNotFound, KeyNotFound, HeaderNotExists, StorageException) -from prozorro_sale.document_service.storages.base_storage import BaseStorage, DATETIME_FORMAT +from prozorro_sale.document_service.storages.base_storage import BaseStorage from prozorro_sale.document_service.utils import critical +from prozorro_sale.document_service.settings import DATETIME_FORMAT ERRORS = { 404: KeyNotFound, @@ -127,12 +128,17 @@ class SwiftStorage(BaseStorage): if field.name in data: data[field.name] = await field.text() if field.name == 'file': + self.validate_file_type(file_type=field.headers['Content-Type']) file_name = cgi.parse_header(field.headers['Content-Disposition'])[1]['filename'] async def read_data(_field) -> bytes: + is_first_chunk = True while file_data := await _field.read_chunk(BUFF_SIZE): sha_hash.update(file_data) md5_hash.update(file_data) + if is_first_chunk: + self.validate_file_data_mime_type(file_data) + is_first_chunk = False yield file_data headers = { diff --git a/src/prozorro_sale/document_service/utils.py b/src/prozorro_sale/document_service/utils.py index 3d571c0..7815aba 100644 --- a/src/prozorro_sale/document_service/utils.py +++ b/src/prozorro_sale/document_service/utils.py @@ -55,6 +55,12 @@ async def excepts_errors_middleware(request, handler): data={'error': 'Remote storage broken. Please try again later.'}, status=424 ) + except errors.InvalidFileType as ex: + LOG.warning(f'Got file with invalid type: {ex}', stack_info=False) + return web.json_response( + data={'error': f'Invalid file type has been provided: {ex}'}, + status=406 + ) def critical(func): diff --git a/test-requirements.txt b/test-requirements.txt index 4b474b6..603987f 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,5 @@ coverage nose aiounittest -moto==2.2.8 \ No newline at end of file +moto==2.2.8 +parameterized diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index 0e27181..21a1117 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -1,10 +1,13 @@ import os import aiohttp +import json from aiohttp import test_utils from prozorro_sale.document_service import sign from prozorro_sale.document_service.api import create_app +from unittest import mock +from parameterized import parameterized class BaseAPITest(test_utils.AioHTTPTestCase): @@ -21,12 +24,27 @@ class APITest(BaseAPITest): mpwriter.append( obj=b'bar', headers={ - 'Content-Type': 'plain/text', + 'Content-Type': 'text/plain', 'Content-Disposition': 'form-data; name="file"; filename="foo.txt"' } ) return mpwriter + @staticmethod + def get_custom_multipart_obj(obj_data=b'abc', **headers): + with aiohttp.MultipartWriter('form-data') as mpwriter: + mpwriter.append( + obj=obj_data, + headers=headers + ) + return mpwriter + + @staticmethod + def read_fixture_content(filename): + with open(f"/fixtures/{filename}", 'rb') as f: + file_content = f.read() + return file_content + @test_utils.unittest_run_loop async def test_ping(self): response = await self.client.get('/api/ping') @@ -80,7 +98,7 @@ class APITest(BaseAPITest): response = await self.client.get(f'/api/documents/public/{doc_id}') data = await response.read() self.assertEqual(response.status, 200) - self.assertEqual(response.content_type, 'plain/text') + self.assertEqual(response.content_type, 'text/plain') self.assertEqual(data, b'bar') @test_utils.unittest_run_loop @@ -99,7 +117,7 @@ class APITest(BaseAPITest): response = await self.client.get(f'/api/documents/private/{doc_id}?token={token}') data = await response.read() self.assertEqual(response.status, 200) - self.assertEqual(response.content_type, 'plain/text') + self.assertEqual(response.content_type, 'text/plain') self.assertEqual(data, b'bar') @test_utils.unittest_run_loop @@ -153,3 +171,54 @@ class APITest(BaseAPITest): data = await response.read() self.assertEqual(response.status, 200) self.assertEqual(data.decode('utf-8'), sign.get_public_key()) + + @test_utils.unittest_run_loop + async def test_default_content_type_uploaded_document_public(self): + headers = { + 'Content-Disposition': 'form-data; name="file"; filename="foo.txt"' + } + response = await self.client.put( + '/api/documents/public', + data=self.get_custom_multipart_obj(obj_data=b'abc', **headers), + headers={"Authorization": "auction_token"} + ) + data = await response.read() + doc_id = sign._decode_token(data)['id'] + response = await self.client.get(f'/api/documents/public/{doc_id}') + data = await response.read() + self.assertEqual(response.status, 200) + self.assertEqual(response.content_type, 'application/octet-stream') + self.assertEqual(data, b'abc') + + @parameterized.expand([ + ('image/jpeg', ['text/javascript', 'application/javascript'], None, 200), + ('application/javascript', ['text/javascript', 'application/javascript'], None, 406), + ('application/pdf', ['text/javascript', 'application/javascript'], '1.pdf', 200), + ('image/gif', ['text/javascript', 'image/gif'], '2.gif', 406) + ]) + @test_utils.unittest_run_loop + async def test_valid_content_type_uploaded_document_public( + self, content_type, forbidden_filetypes, object_file_name, response_status): + headers = { + 'Content-Type': content_type, + 'Content-Disposition': 'form-data; name="file"; filename="foo.txt"' + } + file_content = b'abc' + if object_file_name: + file_content = self.read_fixture_content(object_file_name) + + with mock.patch('prozorro_sale.document_service.storages.base_storage.FORBIDDEN_FILETYPES', forbidden_filetypes): + response = await self.client.put( + '/api/documents/public', + data=self.get_custom_multipart_obj(obj_data=file_content, **headers), + headers={"Authorization": "auction_token"} + ) + data = await response.read() + + if response_status == 406: + self.assertEqual(json.loads(data)['error'], f'Invalid file type has been provided: {content_type}') + else: + doc_id = sign._decode_token(data)['id'] + response = await self.client.get(f'/api/documents/public/{doc_id}') + self.assertEqual(response.status, response_status) + self.assertEqual(response.content_type, content_type) -- GitLab From 1039c7773f0d6063fb0d831c92a92ba287ac3423 Mon Sep 17 00:00:00 2001 From: Volodymyr Kovalenko Date: Fri, 15 Oct 2021 11:22:44 +0300 Subject: [PATCH 3/3] provide file upload by chunks, switch s3 storage to aioboto3 --- requirements.txt | 4 +- .../document_service/settings.py | 18 ++- .../document_service/storages/base_storage.py | 26 ++++ .../storages/memory_storage.py | 18 +-- .../document_service/storages/s3_storage.py | 123 ++++++++++------- .../storages/swift_storage.py | 34 +++-- tests/integration/test_api.py | 5 +- tests/unit/helpers.py | 6 +- tests/unit/test_base_storage.py | 53 ++++++++ tests/unit/test_s3_storage.py | 125 ++++++++---------- tests/unit/test_storage.py | 4 +- 11 files changed, 253 insertions(+), 163 deletions(-) create mode 100644 tests/unit/test_base_storage.py diff --git a/requirements.txt b/requirements.txt index 876dcfe..d5fd391 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ -aiobotocore==1.4.2 pyjwt~=2.0.0 cryptography==3.4.4 -aiohttp==3.7.3 +aiohttp==3.7.4 aiohttp-swagger==1.0.15 prozorro-tools==0.11.0 prozorro-metrics~=1.4.0 @@ -9,3 +8,4 @@ prozorro-auth uvloop==0.15.2 python-dotenv==0.15.0 python-magic==0.4.24 +aioboto3==9.2.2 \ No newline at end of file diff --git a/src/prozorro_sale/document_service/settings.py b/src/prozorro_sale/document_service/settings.py index dad4d0a..0e2c545 100644 --- a/src/prozorro_sale/document_service/settings.py +++ b/src/prozorro_sale/document_service/settings.py @@ -1,13 +1,17 @@ import os +import json DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f' BUCKET_NAME = os.environ.get('BUCKET_NAME', '') MAGIC_BUFFER_RECOMMENDED_SIZE = 2048 -FORBIDDEN_FILETYPES = [ - 'text/javascript', - 'application/javascript', - 'application/x-javascript', - 'application/ecmascript', - 'text/ecmascript' -] +# 2 ** 16 - default linux socket buffer size +READ_BUFFER_SIZE = 2 ** 16 * 20 +S3_UPLOAD_FILE_MAX_THREADS = os.environ.get('S3_UPLOAD_FILE_MAX_THREADS', 10) + +FORBIDDEN_FILETYPES = json.loads(os.environ.get( + 'FORBIDDEN_FILETYPES', + '["text/javascript", "text/cmd", "text/css", "text/php", "text/markdown", "application/javascript", ' + '"application/x-javascript", "application/ecmascript", "text/ecmascript", "text/x-jquery-tmpl",' + '"application/vnd.microsoft.portable-executable"]' +)) diff --git a/src/prozorro_sale/document_service/storages/base_storage.py b/src/prozorro_sale/document_service/storages/base_storage.py index ba648e2..6d8d317 100644 --- a/src/prozorro_sale/document_service/storages/base_storage.py +++ b/src/prozorro_sale/document_service/storages/base_storage.py @@ -1,4 +1,6 @@ import os +import hashlib +from uuid import uuid4 from magic import from_buffer as get_type_from_buffer from prozorro_sale.document_service.errors import InvalidFileType from sys import getsizeof @@ -24,6 +26,30 @@ class BaseStorage: async def get_metadata(self, uuid, scope): raise NotImplementedError("Get_metadata method must be implemented in subclass") + @staticmethod + def setup_upload_data(scope: str, doc_type: str) -> tuple: + uuid = uuid4().hex + sha_hash = hashlib.sha256() + md5_hash = hashlib.md5() + data = {'scope': scope, 'documentType': doc_type} + file_name = None + return uuid, sha_hash, md5_hash, data, file_name + + async def patch_read_by_chunk(self, read_by_chunk_func, sha_hash, md5_hash): + is_first_chunk = True + + async def wrapped(*args, **kwargs): + chunk = await read_by_chunk_func(*args, **kwargs) + sha_hash.update(chunk) + md5_hash.update(chunk) + + nonlocal is_first_chunk + if is_first_chunk: + self.validate_file_data_mime_type(chunk) + is_first_chunk = False + return chunk + return wrapped + @staticmethod def validate_file_type(file_type): if file_type in FORBIDDEN_FILETYPES: diff --git a/src/prozorro_sale/document_service/storages/memory_storage.py b/src/prozorro_sale/document_service/storages/memory_storage.py index 27d29ed..4f23ed4 100644 --- a/src/prozorro_sale/document_service/storages/memory_storage.py +++ b/src/prozorro_sale/document_service/storages/memory_storage.py @@ -1,7 +1,5 @@ -import hashlib import cgi from datetime import datetime -from uuid import uuid4 from aiohttp import web from prozorro_sale.document_service.storages.base_storage import BaseStorage @@ -17,9 +15,7 @@ class MemoryStorage(BaseStorage): self.storage = {} async def upload(self, post_file, scope, doc_type): - uuid = uuid4().hex - data = {'scope': scope, 'documentType': doc_type} - file_name = None + uuid, sha_hash, md5_hash, data, file_name = self.setup_upload_data(scope, doc_type) async for field in post_file: if field.name in data: data[field.name] = await field.text() @@ -27,14 +23,18 @@ class MemoryStorage(BaseStorage): self.validate_file_type(file_type=field.headers['Content-Type']) try: file_name = cgi.parse_header(field.headers['Content-Disposition'])[1]['filename'] - file_data = await field.read() - self.validate_file_data_mime_type(file_data) + + field.read_chunk = await self.patch_read_by_chunk( + read_by_chunk_func=field.read_chunk, sha_hash=sha_hash, md5_hash=md5_hash + ) + file_data = await field.read() # read_chunk method is calling inside + data.update({ 'Content-Type': field.headers['Content-Type'], 'Content-Disposition': field.headers['Content-Disposition'], 'body': file_data, - 'sha': hashlib.sha256(file_data).hexdigest(), - 'hash': 'md5:' + hashlib.md5(file_data).hexdigest(), + 'sha': sha_hash.hexdigest(), + 'hash': 'md5:' + md5_hash.hexdigest(), 'dateCreated': datetime.now().strftime(DATETIME_FORMAT) }) except KeyError as ex: diff --git a/src/prozorro_sale/document_service/storages/s3_storage.py b/src/prozorro_sale/document_service/storages/s3_storage.py index 9696ef0..9128bc3 100644 --- a/src/prozorro_sale/document_service/storages/s3_storage.py +++ b/src/prozorro_sale/document_service/storages/s3_storage.py @@ -1,26 +1,25 @@ from datetime import datetime -from uuid import uuid4 -import hashlib import cgi -import aiobotocore import os +import aioboto3 from aiohttp import web +from boto3.s3.transfer import TransferConfig from botocore.exceptions import ClientError from prozorro_sale.document_service.storages.base_storage import BaseStorage -from prozorro_sale.document_service.errors import FileNotFound, KeyNotFound, HeaderNotExists -from prozorro_sale.document_service.settings import DATETIME_FORMAT +from prozorro_sale.document_service.errors import FileNotFound, KeyNotFound, HeaderNotExists, StorageException +from prozorro_sale.document_service.settings import DATETIME_FORMAT, READ_BUFFER_SIZE, S3_UPLOAD_FILE_MAX_THREADS - -def get_client(): - client = aiobotocore.get_session().create_client( +def get_session(): + session = aioboto3.Session() + s3 = session.client( 's3', region_name=os.environ['BUCKET_HOST'], aws_secret_access_key=os.environ['BUCKET_SECRET_KEY'], aws_access_key_id=os.environ['BUCKET_ACCESS_KEY'] ) - return client + return s3 class S3Storage(BaseStorage): @@ -28,9 +27,8 @@ class S3Storage(BaseStorage): storage_name = 's3' async def upload(self, post_file, scope, doc_type): - uuid = uuid4().hex - data = {'scope': scope, 'documentType': doc_type} - file_name = None + uuid, sha_hash, md5_hash, data, file_name = self.setup_upload_data(scope, doc_type) + _upload_key = f"{scope}/{uuid}" async for field in post_file: if field.name in data: data[field.name] = await field.text() @@ -38,68 +36,92 @@ class S3Storage(BaseStorage): self.validate_file_type(file_type=field.headers['Content-Type']) try: file_name = cgi.parse_header(field.headers['Content-Disposition'])[1]['filename'] - file_data = await field.read() - self.validate_file_data_mime_type(file_data) data.update({ 'Content-Type': field.headers['Content-Type'], 'Content-Disposition': field.headers['Content-Disposition'], - 'body': file_data, - 'sha': hashlib.sha256(file_data).hexdigest(), - 'hash': 'md5:' + hashlib.md5(file_data).hexdigest(), - 'dateCreated': datetime.now().strftime(DATETIME_FORMAT) + 'dateCreated': datetime.now().strftime(DATETIME_FORMAT), }) except KeyError as ex: raise HeaderNotExists(ex) - if 'body' not in data: + try: + field.read_chunk = await self.patch_read_by_chunk( + read_by_chunk_func=field.read_chunk, sha_hash=sha_hash, md5_hash=md5_hash + ) + + s3_upload_config = TransferConfig( + max_concurrency=S3_UPLOAD_FILE_MAX_THREADS + ) + async with get_session() as session: + await session.upload_fileobj( + Fileobj=field, + Bucket=self.bucket, + Key=_upload_key, + Config=s3_upload_config, + ExtraArgs=dict( + Metadata=data + ) + ) + + data.update({ + 'sha': sha_hash.hexdigest(), + 'hash': 'md5:' + md5_hash.hexdigest() + }) + + head_response = await session.head_object(Bucket=self.bucket, Key=_upload_key) + metadata = head_response["Metadata"] + metadata["sha"] = data["sha"] + metadata["hash"] = data["hash"] + + await session.copy_object( + Bucket=self.bucket, Key=_upload_key, CopySource=self.bucket + '/' + _upload_key, + Metadata=metadata, MetadataDirective='REPLACE' + ) + except Exception as e: + msg = f"Unable to s3 upload: {e}" + raise StorageException(msg) + + if 'sha' not in data: raise FileNotFound - content_type = data['Content-Type'] - async with get_client() as client: - await client.put_object( - Bucket=self.bucket, - Key=f"{scope}/{uuid}", - Body=data.pop('body'), - ContentDisposition=data.pop('Content-Disposition'), - ContentType=data.pop('Content-Type'), - Metadata=data - ) data['id'] = uuid return { 'id': uuid, 'scope': data['scope'], 'filename': file_name, 'documentType': data['documentType'], - 'format': content_type, + 'format': data['Content-Type'], 'sha': data['sha'], 'hash': data['hash'], 'dateCreated': data['dateCreated'] } async def get(self, uuid, scope, request): - try: - async with get_client() as client: - response = await client.get_object(Bucket=self.bucket, Key=f"{scope}/{uuid}") - except ClientError: - raise KeyNotFound(uuid) + async with get_session() as session: + try: + response = await session.get_object(Bucket=self.bucket, Key=f"{scope}/{uuid}") + except ClientError: + raise KeyNotFound(uuid) + + response_headers = response['ResponseMetadata']['HTTPHeaders'] + headers = { + 'Content-Type': response_headers['x-amz-meta-content-type'], + 'Content-Disposition': response_headers['x-amz-meta-content-disposition'], + } + stream_resp = web.StreamResponse(headers=headers) + await stream_resp.prepare(request) - async with response['Body'] as stream: - content = await stream.read() - response_headers = response['ResponseMetadata']['HTTPHeaders'] + async with response["Body"] as stream: + while chunk := await stream.read(READ_BUFFER_SIZE): + await stream_resp.write(chunk) - return web.Response( - body=content, - headers={ - 'Content-Disposition': response_headers['content-disposition'], - 'Content-Type': response_headers['content-type'] - }, - status=200 - ) + await stream_resp.write_eof() + return stream_resp async def get_metadata(self, uuid, scope): try: - async with get_client() as client: - response = await client.head_object(Bucket=self.bucket, Key=f"{scope}/{uuid}") + async with get_session() as session: + response = await session.head_object(Bucket=self.bucket, Key=f"{scope}/{uuid}") except ClientError: raise KeyNotFound(uuid) @@ -107,9 +129,10 @@ class S3Storage(BaseStorage): metadata = { 'X-Scope': metadata_response['scope'], 'X-Document-Type': metadata_response['documenttype'], - 'Content-Type': metadata_response['ContentType'], + 'Content-Type': metadata_response['content-type'], 'X-SHA': metadata_response['sha'], + 'ETag': metadata_response['hash'], 'X-Date-Created': metadata_response['datecreated'], - 'ETag': metadata_response['hash'] + 'S3-ETag': response['ETag'] } return web.Response(headers=metadata, status=200) diff --git a/src/prozorro_sale/document_service/storages/swift_storage.py b/src/prozorro_sale/document_service/storages/swift_storage.py index 58f2d0f..2d82010 100644 --- a/src/prozorro_sale/document_service/storages/swift_storage.py +++ b/src/prozorro_sale/document_service/storages/swift_storage.py @@ -1,20 +1,20 @@ import asyncio import cgi import datetime -import hashlib import logging import os -from uuid import uuid4 import aiohttp import prometheus_client from aiohttp import web +from aiohttp import BodyPartReader +from collections import AsyncIterable from prozorro_sale.document_service.errors import (RetryRequestException, FileNotFound, KeyNotFound, HeaderNotExists, StorageException) from prozorro_sale.document_service.storages.base_storage import BaseStorage from prozorro_sale.document_service.utils import critical -from prozorro_sale.document_service.settings import DATETIME_FORMAT +from prozorro_sale.document_service.settings import DATETIME_FORMAT, READ_BUFFER_SIZE ERRORS = { 404: KeyNotFound, @@ -22,8 +22,6 @@ ERRORS = { 422: ValueError, } -# 2 ** 16 - default linux socket buffer size -BUFF_SIZE = 2 ** 16 * 20 LOG = logging.getLogger('swift-storage') get_document_session_latency = prometheus_client.Summary( @@ -45,7 +43,7 @@ def client(): sock_connect=2 * 60, sock_read=10 * 60 ), - read_bufsize=BUFF_SIZE + read_bufsize=READ_BUFFER_SIZE ) @@ -115,13 +113,9 @@ class SwiftStorage(BaseStorage): raise RetryRequestException('Please try again later') async def upload(self, post_file, scope, doc_type): - uuid = uuid4().hex - sha_hash = hashlib.sha256() - md5_hash = hashlib.md5() - data = {'scope': scope, 'documentType': doc_type} + uuid, sha_hash, md5_hash, data, file_name = self.setup_upload_data(scope, doc_type) object_url = f"{self.url}/{self._container}/{scope}_{uuid}" censored_url = object_url.replace(os.environ['SWIFT_PROJECT_ID'], "") - file_name = None date_created = None headers = {} async for field in post_file: @@ -131,22 +125,24 @@ class SwiftStorage(BaseStorage): self.validate_file_type(file_type=field.headers['Content-Type']) file_name = cgi.parse_header(field.headers['Content-Disposition'])[1]['filename'] - async def read_data(_field) -> bytes: + async def read_data(_field: BodyPartReader) -> AsyncIterable: is_first_chunk = True - while file_data := await _field.read_chunk(BUFF_SIZE): - sha_hash.update(file_data) - md5_hash.update(file_data) + while chunk := await _field.read_chunk(READ_BUFFER_SIZE): + sha_hash.update(chunk) + md5_hash.update(chunk) if is_first_chunk: - self.validate_file_data_mime_type(file_data) + self.validate_file_data_mime_type(chunk) is_first_chunk = False - yield file_data + yield chunk headers = { 'X-Auth-Token': self.auth.get_token() } with put_document_session_latency.time(): async with client() as session: - response = await session.put(object_url, ssl=False, data=read_data(field), headers=headers) + response = await session.put( + object_url, ssl=False, data=read_data(field), headers=headers + ) if response.status == 401: await self.refresh_token() @@ -215,7 +211,7 @@ class SwiftStorage(BaseStorage): } stream = web.StreamResponse(headers=headers) await stream.prepare(request) - while chunk := await response.content.read(BUFF_SIZE): + while chunk := await response.content.read(READ_BUFFER_SIZE): await stream.write(chunk) await stream.write_eof() diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index 21a1117..454e7fd 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -3,6 +3,7 @@ import os import aiohttp import json from aiohttp import test_utils +import hashlib from prozorro_sale.document_service import sign from prozorro_sale.document_service.api import create_app @@ -86,7 +87,9 @@ class APITest(BaseAPITest): self.assertIn('X-Document-Type', response.headers) self.assertIn('X-SHA', response.headers) self.assertIn('X-Date-Created', response.headers) - self.assertIn('ETag', response.headers) + expected_sha = hashlib.sha256() + expected_sha.update(b'bar') + self.assertEqual(response.headers['X-SHA'], expected_sha.hexdigest()) @test_utils.unittest_run_loop async def test_get_document_public(self): diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py index 1d1c826..d4912a4 100644 --- a/tests/unit/helpers.py +++ b/tests/unit/helpers.py @@ -1,5 +1,6 @@ +from io import BytesIO from unittest.mock import MagicMock - +from tests.unit.test_base_storage import Stream async def get_text(): return 'test' @@ -9,7 +10,7 @@ async def get_file_data(): return b'test_data' -def get_mocked_data(): +def get_mocked_data(storage_type=None): main_mock = MagicMock() mock_doc_type = MagicMock() mock_file = MagicMock() @@ -23,5 +24,6 @@ def get_mocked_data(): 'Content-Disposition': 'form-data; name="file"; filename="foo.txt"' } mock_file.read = get_file_data + main_mock.__aiter__.return_value = [mock_doc_type, mock_file] return main_mock diff --git a/tests/unit/test_base_storage.py b/tests/unit/test_base_storage.py new file mode 100644 index 0000000..7176dd8 --- /dev/null +++ b/tests/unit/test_base_storage.py @@ -0,0 +1,53 @@ +from aiounittest import AsyncTestCase +from prozorro_sale.document_service.storages.base_storage import BaseStorage +import aiohttp +import io +from unittest import mock +import hashlib + + +class Stream: + def __init__(self, content): + self.content = io.BytesIO(content) + + async def read(self, size=None): + return self.content.read(size) + + def at_eof(self): + return self.content.tell() == len(self.content.getbuffer()) + + async def readline(self): + return self.content.readline() + + def unread_data(self, data): + self.content = io.BytesIO(data + self.content.read()) + + +class BaseStorageTest(AsyncTestCase): + + def setUp(self) -> None: + self.base_storage = BaseStorage("") + + @staticmethod + def read_fixture_content(filename): + with open(f"/fixtures/{filename}", 'rb') as f: + file_content = f.read() + return file_content + + async def test_patch_read_by_chunk(self): + file_stream_data = aiohttp.BodyPartReader(b"--:", {}, Stream(b"Hello, world!\r\n--:")) + sha_hash = hashlib.sha256() + md5_hash = hashlib.md5() + with mock.patch( + 'prozorro_sale.document_service.storages.base_storage.BaseStorage.validate_file_data_mime_type', + return_value=None + ) as validate_file_type_mock: + file_stream_data.read_chunk = await self.base_storage.patch_read_by_chunk( + file_stream_data.read_chunk, sha_hash, md5_hash + ) + res = await file_stream_data.read() + validate_file_type_mock.assert_called_once_with(b"Hello, world!") + + self.assertEqual(res, b"Hello, world!") + expected_sha_hash = hashlib.sha256(b"Hello, world!") + self.assertEqual(sha_hash.hexdigest(), expected_sha_hash.hexdigest()) diff --git a/tests/unit/test_s3_storage.py b/tests/unit/test_s3_storage.py index f4313d4..f5f4be3 100644 --- a/tests/unit/test_s3_storage.py +++ b/tests/unit/test_s3_storage.py @@ -1,7 +1,7 @@ import os import boto3 import io -import aiobotocore +import aioboto3 from aiounittest import AsyncTestCase from prozorro_sale.document_service.storages.s3_storage import S3Storage @@ -10,8 +10,9 @@ from moto import mock_s3 from botocore import awsrequest from botocore.stub import Stubber from unittest.mock import patch -from aiobotocore.response import StreamingBody from tests.unit.helpers import get_mocked_data, get_file_data +from botocore.response import StreamingBody +from aiohttp.test_utils import make_mocked_request class MonkeyPatchedAWSResponse(awsrequest.AWSResponse): @@ -33,7 +34,7 @@ class RawStream(io.BytesIO): async def __aexit__(self, exc_type, exc_val, exc_tb): pass - async def read(self): + async def read(self, chunk_size): return super().read() @@ -54,83 +55,67 @@ class S3StorageTest(AsyncTestCase): os.environ['BUCKET_SECRET_KEY'] = 'testing' os.environ['BUCKET_ACCESS_KEY'] = 'testing' self.s3_bucket = self.create_bucket() - self.aiobotocore_session = aiobotocore.get_session() + self.aioboto3_session = aioboto3.Session() def tearDown(self): self.mock_s3.stop() - async def test_upload_object(self): - _file = get_mocked_data() - res = await self.s3_storage.upload(post_file=_file, scope='public', doc_type='') - self.assertIn('id', res) - self.assertEqual('public', res['scope']) - self.assertEqual('foo.txt', res['filename']) - self.assertEqual('test', res['documentType']) - self.assertEqual('plain/text', res['format']) - self.assertIn('sha', res) - self.assertIn('hash', res) - self.assertIn('dateCreated', res) - - with self.assertRaises(KeyNotFound): - await self.s3_storage.get(uuid='unknown_key', scope='public', request=None) - - actual_res = self.s3_bucket.Object(self.bucket_name, f"public/{res['id']}").get()['Body'].read() - expected_res = await get_file_data() - self.assertEqual(actual_res, expected_res) - async def test_get_object(self): uuid = 'a'*32 scope = 'public' + req = make_mocked_request('GET', '/api/documents/private/111', headers={'token': '123'}) body_content = b"s3 some test binary data" - async with self.aiobotocore_session.create_client('s3', region_name='us-east-1') as client: - with Stubber(client) as stubber: - stubber.add_response( - "get_object", - { - "Body": StreamingBody( - raw_stream=RawStream(body_content), content_length=128 - ), - "ContentLength": 128, - "ResponseMetadata": { - "HTTPHeaders": { - "content-disposition": "test-content-disposition", - "content-type": "test-content-type" - } - } - }, - expected_params={"Bucket": self.bucket_name, "Key": f"{scope}/{uuid}"} - ) - with patch('prozorro_sale.document_service.storages.s3_storage.get_client', - return_value=client) as mock_get_client: - response = await self.s3_storage.get(uuid=uuid, scope=scope, request=None) - self.assertEquals(response.status, 200) - self.assertEquals(response.body, body_content) - mock_get_client.assert_called_once() + self._s3_client = await self.aioboto3_session.client('s3').__aenter__() + self._s3_stubstubber = Stubber(self._s3_client) + self._s3_stubstubber.add_response( + "get_object", + { + "Body": RawStream(body_content), + "ContentLength": 128, + "ResponseMetadata": { + "HTTPHeaders": { + "x-amz-meta-content-type": "test-content-type", + "x-amz-meta-content-disposition": "test-content-disposition" + } + } + }, + expected_params={"Bucket": self.bucket_name, "Key": f"{scope}/{uuid}"} + ) + self._s3_stubstubber.activate() + with patch('prozorro_sale.document_service.storages.s3_storage.get_session', + return_value=self._s3_client) as mock_get_session: + response = await self.s3_storage.get(uuid=uuid, scope=scope, request=req) + self.assertEquals(response.status, 200) + self.assertEquals(response.headers["content-type"], "test-content-type") + mock_get_session.assert_called_once() async def test_head_object(self): uuid = 'b'*32 scope = 'public' - async with self.aiobotocore_session.create_client('s3', region_name='us-east-1') as client: - with Stubber(client) as stubber: - stubber.add_response( - "head_object", - { - "Metadata": { - "scope": "test_scope", - "documenttype": "test_documenttype", - "ContentType": "test_ContentType", - "sha": "test_sha", - "datecreated": "test_datecreated", - "hash": "test_hash" - } - }, - expected_params={"Bucket": self.bucket_name, "Key": f"{scope}/{uuid}"} - ) - with patch('prozorro_sale.document_service.storages.s3_storage.get_client', - return_value=client) as mock_get_client: - response = await self.s3_storage.get_metadata(uuid=uuid, scope=scope) - self.assertEquals(response.status, 200) - self.assertEquals(response.headers["ETag"], "test_hash") - self.assertEquals(response.headers["X-Date-Created"], "test_datecreated") - mock_get_client.assert_called_once() + self._s3_client = await self.aioboto3_session.client('s3').__aenter__() + self._s3_stubstubber = Stubber(self._s3_client) + self._s3_stubstubber.add_response( + "head_object", + { + "Metadata": { + "scope": "test_scope", + "documenttype": "test_documenttype", + "content-type": "test_ContentType", + "datecreated": "test_datecreated", + "sha": "test_sha256_hash", + "hash": "test_md5_hash" + }, + "ETag": "39bdcd1f8cf6aa293799f3291596f187-1" + }, + expected_params={"Bucket": self.bucket_name, "Key": f"{scope}/{uuid}"} + ) + self._s3_stubstubber.activate() + with patch('prozorro_sale.document_service.storages.s3_storage.get_session', + return_value=self._s3_client) as mock_get_session: + response = await self.s3_storage.get_metadata(uuid=uuid, scope=scope) + self.assertEquals(response.status, 200) + self.assertEquals(response.headers["S3-ETag"], "39bdcd1f8cf6aa293799f3291596f187-1") + self.assertEquals(response.headers["ETag"], "test_md5_hash") + self.assertEquals(response.headers["X-Date-Created"], "test_datecreated") + mock_get_session.assert_called_once() diff --git a/tests/unit/test_storage.py b/tests/unit/test_storage.py index 74b87c1..2cb2c33 100644 --- a/tests/unit/test_storage.py +++ b/tests/unit/test_storage.py @@ -7,7 +7,7 @@ from prozorro_sale.document_service.storages.memory_storage import MemoryStorage from tests.unit.helpers import get_mocked_data -class MemoryStorageTest(AsyncTestCase): +class StorageTest(AsyncTestCase): def setUp(self) -> None: self.storage_obj = MemoryStorage('') @@ -25,7 +25,6 @@ class MemoryStorageTest(AsyncTestCase): self.assertIn('documentType', data) self.assertIn('format', data) self.assertIn('sha', data) - self.assertIn('hash', data) self.assertIn('dateCreated', data) async def test_get_file(self): @@ -45,4 +44,3 @@ class MemoryStorageTest(AsyncTestCase): self.assertIn('X-Document-Type', data.headers) self.assertIn('X-SHA', data.headers) self.assertIn('X-Date-Created', data.headers) - self.assertIn('ETag', data.headers) -- GitLab