本文介绍如何访问OSS数据。

背景信息

E-MapReduce中,Spark和Hadoop对OSS做了无缝兼容,可以像使用HDFS一样操作OSS文件。访问OSS数据支持以下两种方式:

免AccessKey方式访问OSS数据(EMR-3.24.0及后续版本)

[Scala] 
   import org.apache.hadoop.conf.Configuration
   import org.apache.hadoop.fs.{Path, FileSystem}
   val dir = "oss://bucket/dir"
   val path = new Path(dir)
   val conf = new Configuration()
   conf.set("fs.oss.impl", "com.aliyun.emr.fs.oss.JindoOssFileSystem")
   val fs = FileSystem.get(path.toUri, conf)
   val fileList = fs.listStatus(path)
   ...
[Java]
   import org.apache.hadoop.conf.Configuration;
   import org.apache.hadoop.fs.Path;
   import org.apache.hadoop.fs.FileStatus;
   import org.apache.hadoop.fs.FileSystem;
   String dir = "oss://bucket/dir";
   Path path = new Path(dir);
   Configuration conf = new Configuration();
   conf.set("fs.oss.impl", "com.aliyun.emr.fs.oss.JindoOssFileSystem");
   FileSystem fs = FileSystem.get(path.toUri(), conf);
   FileStatus[] fileList = fs.listStatus(path);
   ...

免AccessKey方式访问OSS数据(EMR-3.24.0之前版本)

[Scala] 
   import org.apache.hadoop.conf.Configuration
   import org.apache.hadoop.fs.{Path, FileSystem}
   val dir = "oss://bucket/dir"
   val path = new Path(dir)
   val conf = new Configuration()
   conf.set("fs.oss.impl", "com.aliyun.fs.oss.nat.NativeOssFileSystem")
   val fs = FileSystem.get(path.toUri, conf)
   val fileList = fs.listStatus(path)
   ...
[Java]
   import org.apache.hadoop.conf.Configuration;
   import org.apache.hadoop.fs.Path;
   import org.apache.hadoop.fs.FileStatus;
   import org.apache.hadoop.fs.FileSystem;
   String dir = "oss://bucket/dir";
   Path path = new Path(dir);
   Configuration conf = new Configuration();
   conf.set("fs.oss.impl", "com.aliyun.fs.oss.nat.NativeOssFileSystem");
   FileSystem fs = FileSystem.get(path.toUri(), conf);
   FileStatus[] fileList = fs.listStatus(path);
   ...