OSS Python SDK提供了封装的迭代器来列举存储空间(Bucket)中的文件(Object),支持按前缀、目录、起始位置等条件筛选文件。
使用方式
OSS Python SDK提供两种列举文件的方法:ObjectIterator(基于GetBucket (ListObjects)接口)和ObjectIteratorV2(基于ListObjectsV2(GetBucketV2)接口)。两种方法的主要区别:
ObjectIterator:默认返回文件的owner信息。ObjectIteratorV2:通过fetch_owner参数控制是否返回owner信息。使用
ObjectIteratorV2需要Python SDK版本为2.12.0及以上。
推荐使用ObjectIteratorV2,对开启版本控制的Bucket提供更好的支持。
ObjectIteratorV2构造器声明oss2.ObjectIteratorV2(bucket, prefix='', delimiter='', continuation_token='', start_after='', fetch_owner=False, encoding_type = 'url', max_keys=100, max_retries=None, headers=None)ObjectIterator构造器声明oss2.ObjectIterator(bucket, prefix='', delimiter='', marker='', max_keys=100, max_retries=None, headers=None)
代码示例
运行代码前请先安装Python SDK并完成访问凭证环境变量配置,具体操作参见Python SDK快速入门。阿里云主账号默认具有所有权限,如果使用RAM用户或RAM角色,需确保相应的用户或角色具有oss:ListObjects权限。
列举所有文件
ObjectIteratorV2(推荐)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举Bucket下的所有文件
print("列举所有文件:")
for obj in oss2.ObjectIteratorV2(bucket):
print(f"文件名: {obj.key}, 大小: {obj.size} 字节")
if __name__ == "__main__":
main()如果需要获取文件的owner信息,需设置fetch_owner=True:
# 列举所有文件并获取owner信息
for obj in oss2.ObjectIteratorV2(bucket, fetch_owner=True):
print(f"文件名: {obj.key}")
print(f"Owner名称: {obj.owner.display_name}")
print(f"Owner ID: {obj.owner.id}")ObjectIterator
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举Bucket下的所有文件
print("列举所有文件:")
for obj in oss2.ObjectIterator(bucket):
print(f"文件名: {obj.key}, 大小: {obj.size} 字节")
if __name__ == "__main__":
main()列举指定个数的文件
ObjectIteratorV2(推荐)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
from itertools import islice
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举Bucket下的前10个文件
print("列举前10个文件:")
for obj in islice(oss2.ObjectIteratorV2(bucket), 10):
print(f"文件名: {obj.key}")
if __name__ == "__main__":
main()ObjectIterator
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
from itertools import islice
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举Bucket下的前10个文件
print("列举前10个文件:")
for obj in islice(oss2.ObjectIterator(bucket), 10):
print(f"文件名: {obj.key}")
if __name__ == "__main__":
main()列举指定前缀的文件
假设Bucket中有4个文件:oss.jpg、fun/test.jpg、fun/movie/001.avi、fun/movie/007.avi,正斜线(/)作为文件夹的分隔符。
ObjectIteratorV2(推荐)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举fun文件夹下的所有文件,包括子目录下的文件
print("列举前缀为fun/的所有文件:")
for obj in oss2.ObjectIteratorV2(bucket, prefix='fun/'):
print(f"文件名: {obj.key}")
if __name__ == "__main__":
main()预期输出:
列举前缀为fun/的所有文件:
文件名: fun/
文件名: fun/movie/
文件名: fun/movie/001.avi
文件名: fun/movie/007.avi
文件名: fun/test.jpgObjectIterator
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举fun文件夹下的所有文件,包括子目录下的文件
print("列举前缀为fun/的所有文件:")
for obj in oss2.ObjectIterator(bucket, prefix='fun/'):
print(f"文件名: {obj.key}")
if __name__ == "__main__":
main()预期输出:
列举前缀为fun/的所有文件:
文件名: fun/
文件名: fun/movie/
文件名: fun/movie/001.avi
文件名: fun/movie/007.avi
文件名: fun/test.jpg列举指定目录下的文件和子目录
OSS没有文件夹的概念,所有元素都是以文件来存储。创建文件夹本质上是创建了一个大小为0并以正斜线(/)结尾的文件。控制台会将以正斜线(/)结尾的文件以文件夹的方式展示。
通过delimiter和prefix两个参数可以模拟文件夹功能:
如果设置
prefix为某个文件夹名称,会列举以此prefix开头的文件,即该文件夹下所有的文件和子文件夹(包括子文件夹下的文件)。如果再设置
delimiter为正斜线(/),则只列举该文件夹下的文件和子文件夹(目录)名称,子文件夹下的文件不会显示。
ObjectIteratorV2(推荐)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举fun文件夹下的文件与子文件夹名称,不列举子文件夹下的文件
print("列举fun目录下的文件和子目录:")
for obj in oss2.ObjectIteratorV2(bucket, prefix='fun/', delimiter='/', start_after='fun/'):
# 通过is_prefix方法判断obj是否为文件夹
if obj.is_prefix():
print(f"子目录: {obj.key}")
else:
print(f"文件: {obj.key}")
if __name__ == "__main__":
main()预期输出:
列举fun目录下的文件和子目录:
子目录: fun/movie/
文件: fun/test.jpgObjectIterator
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举fun文件夹下的文件与子文件夹名称,不列举子文件夹下的文件
print("列举fun目录下的文件和子目录:")
for obj in oss2.ObjectIterator(bucket, prefix='fun/', delimiter='/', marker='fun/'):
# 通过is_prefix方法判断obj是否为文件夹
if obj.is_prefix():
print(f"子目录: {obj.key}")
else:
print(f"文件: {obj.key}")
if __name__ == "__main__":
main()预期输出:
列举fun目录下的文件和子目录:
子目录: fun/movie/
文件: fun/test.jpg列举指定起始位置后的文件
假设Bucket中包含4个文件,分别为x1.txt、x2.txt、z1.txt和z2.txt。
ObjectIteratorV2(推荐)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举x2.txt之后的所有文件(不包含x2.txt本身)
print("列举x2.txt之后的所有文件:")
for obj in oss2.ObjectIteratorV2(bucket, start_after="x2.txt"):
print(f"文件名: {obj.key}")
if __name__ == "__main__":
main()预期输出:
列举x2.txt之后的所有文件:
文件名: z1.txt
文件名: z2.txtObjectIterator
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举x2.txt之后的所有文件(不包含x2.txt本身)
print("列举x2.txt之后的所有文件:")
for obj in oss2.ObjectIterator(bucket, marker="x2.txt"):
print(f"文件名: {obj.key}")
if __name__ == "__main__":
main()预期输出:
列举x2.txt之后的所有文件:
文件名: z1.txt
文件名: z2.txt获取指定目录下的文件大小
ObjectIteratorV2(推荐)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def calculate_folder_size(bucket, folder):
"""计算指定文件夹的总大小"""
total_size = 0
for obj in oss2.ObjectIteratorV2(bucket, prefix=folder):
total_size += obj.size
return total_size
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举根目录下的所有文件和文件夹,并显示大小
print("列举根目录下的文件和文件夹大小:")
for obj in oss2.ObjectIteratorV2(bucket, delimiter='/'):
if obj.is_prefix():
# 计算文件夹的总大小
folder_size = calculate_folder_size(bucket, obj.key)
print(f"目录: {obj.key}, 大小: {folder_size} 字节")
else:
# 直接显示文件大小
print(f"文件: {obj.key}, 大小: {obj.size} 字节")
if __name__ == "__main__":
main()ObjectIterator
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import oss2
from oss2.credentials import EnvironmentVariableCredentialsProvider
def calculate_folder_size(bucket, folder):
"""计算指定文件夹的总大小"""
total_size = 0
for obj in oss2.ObjectIterator(bucket, prefix=folder):
total_size += obj.size
return total_size
def main():
# 从环境变量获取访问凭证
auth = oss2.ProviderAuthV4(EnvironmentVariableCredentialsProvider())
# 设置Bucket所在地域的Endpoint和Region
endpoint = "https://oss-cn-hangzhou.aliyuncs.com"
region = "cn-hangzhou"
# 初始化Bucket
bucket = oss2.Bucket(auth, endpoint, "example-bucket", region=region)
# 列举根目录下的所有文件和文件夹,并显示大小
print("列举根目录下的文件和文件夹大小:")
for obj in oss2.ObjectIterator(bucket, delimiter='/'):
if obj.is_prefix():
# 计算文件夹的总大小
folder_size = calculate_folder_size(bucket, obj.key)
print(f"目录: {obj.key}, 大小: {folder_size} 字节")
else:
# 直接显示文件大小
print(f"文件: {obj.key}, 大小: {obj.size} 字节")
if __name__ == "__main__":
main()