From 1fc433322294b508df067a05488c4ea706743722 Mon Sep 17 00:00:00 2001 From: shokakucarrier Date: Fri, 1 Sep 2023 21:07:24 +0800 Subject: [PATCH 1/3] use pagenator in list item to avoid 1000 limit --- charon/storage.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/charon/storage.py b/charon/storage.py index e040b5a6..899008ca 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -698,32 +698,37 @@ def list_folder_content(self, bucket_name: str, folder: str) -> List[str]: not in its subfolders. """ bucket = self.__get_bucket(bucket_name) + try: if not folder or folder.strip() == "/" or folder.strip() == "": - result = bucket.meta.client.list_objects( + paginator = bucket.meta.client.get_paginator('list_objects_v2') + pages = paginator.paginate( Bucket=bucket.name, Delimiter='/' ) else: prefix = folder if folder.endswith("/") else folder+"/" - result = bucket.meta.client.list_objects( + paginator = bucket.meta.client.get_paginator('list_objects_v2') + pages = paginator.paginate( Bucket=bucket.name, Prefix=prefix, Delimiter='/' ) + except (ClientError, HTTPClientError) as e: logger.error("ERROR: Can not get contents of %s from bucket" " %s due to error: %s ", folder, bucket_name, e) return [] - + contents = [] - folders = result.get("CommonPrefixes") - if folders: - contents.extend([f.get("Prefix") for f in folders]) - files = result.get("Contents") - if files: - contents.extend([f.get("Key") for f in files]) + for page in pages: + folders = page.get("CommonPrefixes") + if folders: + contents.extend([f.get("Prefix") for f in folders]) + files = page.get("Contents") + if files: + contents.extend([f.get("Key") for f in files]) return contents def file_exists_in_bucket( From c9d82e2d39159bec8c593823a5edd712ec306fe2 Mon Sep 17 00:00:00 2001 From: shokakucarrier Date: Fri, 1 Sep 2023 21:14:29 +0800 Subject: [PATCH 2/3] fix linter issue --- charon/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charon/storage.py b/charon/storage.py index 899008ca..62912451 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -698,7 +698,7 @@ def list_folder_content(self, bucket_name: str, folder: str) -> List[str]: not in its subfolders. """ bucket = self.__get_bucket(bucket_name) - + try: if not folder or folder.strip() == "/" or folder.strip() == "": paginator = bucket.meta.client.get_paginator('list_objects_v2') @@ -720,7 +720,7 @@ def list_folder_content(self, bucket_name: str, folder: str) -> List[str]: " %s due to error: %s ", folder, bucket_name, e) return [] - + contents = [] for page in pages: folders = page.get("CommonPrefixes") From 4a6c01b076ca8e325084680f5e70e91b2822cd3f Mon Sep 17 00:00:00 2001 From: shokakucarrier Date: Fri, 1 Sep 2023 21:26:00 +0800 Subject: [PATCH 3/3] minor code improvement --- charon/storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/charon/storage.py b/charon/storage.py index 62912451..45963c42 100644 --- a/charon/storage.py +++ b/charon/storage.py @@ -700,15 +700,14 @@ def list_folder_content(self, bucket_name: str, folder: str) -> List[str]: bucket = self.__get_bucket(bucket_name) try: + paginator = bucket.meta.client.get_paginator('list_objects_v2') if not folder or folder.strip() == "/" or folder.strip() == "": - paginator = bucket.meta.client.get_paginator('list_objects_v2') pages = paginator.paginate( Bucket=bucket.name, Delimiter='/' ) else: prefix = folder if folder.endswith("/") else folder+"/" - paginator = bucket.meta.client.get_paginator('list_objects_v2') pages = paginator.paginate( Bucket=bucket.name, Prefix=prefix,