使用Java SDK的SelectObject查询CSV和JSON文件

本文介绍如何使用Java SDKSelectObject查询CSVJSON文件。

说明

本文示例由阿里云用户bin提供,仅供参考。

以下代码用于查询CSV文件和JSON文件:

import com.aliyun.oss.ClientBuilderConfiguration;
import com.aliyun.oss.ClientException;
import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;
import com.aliyun.oss.common.auth.CredentialsProviderFactory;
import com.aliyun.oss.common.auth.EnvironmentVariableCredentialsProvider;
import com.aliyun.oss.common.comm.SignVersion;
import com.aliyun.oss.model.*;

import java.io.*;
import java.nio.charset.StandardCharsets;

/**
 * 本程序演示如何使用阿里云OSS SDK对CSV和JSON文件进行查询操作。
 * 主要功能包括:
 * 1. 获取CSV文件的元数据信息,包括总行数和分片数量。
 * 2. 查询CSV文件内容,并输出查询结果。
 * 3. 查询简单JSON文件内容,并将结果保存到本地文件。
 * 4. 查询复杂JSON文件内容,并将结果保存到本地文件。
 *
 * 注意事项:
 * - 请确保在环境变量中设置了阿里云的Access Key ID和Access Key Secret。
 * - 请确保OSS Bucket名称和区域配置正确。
 */
public class selectobject {

    private static final String CSV_KEY = "test.csv"; // CSV文件路径
    private static final String ENDPOINT = "http://oss-cn-hangzhou.aliyuncs.com";
    private static final String BUCKET_NAME = "examplebucket"; // Bucket名称
    private static final String REGION = "cn-hangzhou"; // bucket所在地域

    /**
     * 创建OSS客户端实例。
     *
     * @return OSS客户端实例
     */
    private static OSS createOSSClient() throws com.aliyuncs.exceptions.ClientException {
        // 从环境变量中获取凭证
        EnvironmentVariableCredentialsProvider credentialsProvider =  CredentialsProviderFactory.newEnvironmentVariableCredentialsProvider();

        // 使用credentialsProvider进行后续操作...
        ClientBuilderConfiguration clientBuilderConfiguration = new ClientBuilderConfiguration();
        clientBuilderConfiguration.setSignatureVersion(SignVersion.V4);
        return OSSClientBuilder.create()
                .endpoint(ENDPOINT)
                .credentialsProvider(credentialsProvider)
                .clientConfiguration(clientBuilderConfiguration)
                .region(REGION)
                .build();
    }
    /**
     * 程序入口
     */
    public static void main(String[] args) {
        try {
            createCsvSelectObjectMetadata(); // 获取CSV文件元数据
            selectCsv(); // 查询CSV文件内容
            selectSimpleJson(); // 查询简单JSON文件内容
            selectComplexJson(); // 查询复杂JSON文件内容
        } catch (ClientException ex) {
            System.out.println("ClientException: " + ex.getMessage());
        }
    }

    /**
     * 获取CSV文件的元数据信息,包括总行数和分片数量。
     */
    public static void createCsvSelectObjectMetadata() {
        OSS ossClient = null;  // 创建OSS客户端实例
        try {
            ossClient = createOSSClient();
        } catch (com.aliyuncs.exceptions.ClientException e) {
            throw new RuntimeException(e);
        }
        String content = "name,school,company,age\n" +
                "Lora Francis,School A,Staples Inc,27\n" +
                "Eleanor Little,School B,\"Conectiv, Inc\",43\n" +
                "Rosie Hughes,School C,Western Gas Resources Inc,44\n" +
                "Lawrence Ross,School D,MetLife Inc.,24";
        // 将CSV内容上传到OSS
        ossClient.putObject(BUCKET_NAME, CSV_KEY, new ByteArrayInputStream(content.getBytes()));

        // 创建SelectObjectMetadata请求并获取元数据
        SelectObjectMetadata selectObjectMetadata = ossClient.createSelectObjectMetadata(
                new CreateSelectObjectMetadataRequest(BUCKET_NAME, CSV_KEY)
                        .withInputSerialization(
                                new InputSerialization().withCsvInputFormat(
                                        new CSVFormat().withHeaderInfo(CSVFormat.Header.Use)
                                                .withRecordDelimiter("\n"))));

        // 输出CSV文件的总行数和分片数量
        int splits = selectObjectMetadata.getCsvObjectMetadata().getSplits();
        System.out.println("CSV被拆分成了 " + splits + " 份。");
        ossClient.shutdown(); // 关闭OSS客户端
    }

    /**
     * 查询CSV文件内容,并输出查询结果。
     */
    public static void selectCsv() {
        OSS ossClient = null; // 创建OSS客户端实例
        try {
            ossClient = createOSSClient();
        } catch (com.aliyuncs.exceptions.ClientException e) {
            throw new RuntimeException(e);
        }
        try {
            // 创建SelectObjectRequest并设置查询条件
            SelectObjectRequest selectObjectRequest =
                    new SelectObjectRequest(BUCKET_NAME, CSV_KEY)
                            .withInputSerialization(
                                    new InputSerialization().withCsvInputFormat(
                                            new CSVFormat().withHeaderInfo(CSVFormat.Header.Use)
                                                    .withRecordDelimiter("\n")))
                            .withOutputSerialization(new OutputSerialization().withCsvOutputFormat(new CSVFormat()));

            // 设置SQL表达式,查询总记录数和年龄的最大值、最小值
            selectObjectRequest.setExpression("select count(*), max(cast(_4 as int)), min(cast(_4 as int)) from ossobject");

            // 执行查询并读取结果
            OSSObject ossObject = ossClient.selectObject(selectObjectRequest);
            BufferedReader reader = new BufferedReader(new InputStreamReader(ossObject.getObjectContent(), StandardCharsets.UTF_8));
            String line;
            while ((line = reader.readLine()) != null) {
                System.out.println("CSV文件的查询结果为:"+line);
            }
        } catch (Exception ex) {
            System.out.println(ex.getMessage());
        } finally {
            ossClient.shutdown(); // 关闭OSS客户端
        }
    }

    /**
     * 查询简单JSON文件内容,并将结果保存到本地文件。
     */
    public static void selectSimpleJson() {
        String key = "simple.json"; // 自定义JSON文件路径
        OSS ossClient = null;   // 创建OSS客户端实例
        try {
            ossClient = createOSSClient();
        } catch (com.aliyuncs.exceptions.ClientException e) {
            throw new RuntimeException(e);
        }
        final String content = "{\n" +
                "\t\"name\": \"Lora Francis\",\n" +
                "\t\"age\": 27,\n" +
                "\t\"company\": \"Staples Inc\"\n" +
                "}\n" +
                "{\n" +
                "\t\"name\": \"Eleanor Little\",\n" +
                "\t\"age\": 43,\n" +
                "\t\"company\": \"Conectiv, Inc\"\n" +
                "}\n" +
                "{\n" +
                "\t\"name\": \"Rosie Hughes\",\n" +
                "\t\"age\": 44,\n" +
                "\t\"company\": \"Western Gas Resources Inc\"\n" +
                "}\n" +
                "{\n" +
                "\t\"name\": \"Lawrence Ross\",\n" +
                "\t\"age\": 24,\n" +
                "\t\"company\": \"MetLife Inc.\"\n" +
                "}";

        // 将JSON内容上传到OSS
        try {
            ossClient.putObject(BUCKET_NAME, key, new ByteArrayInputStream(content.getBytes()));

            // 创建SelectObjectRequest并设置查询条件
            SelectObjectRequest selectObjectRequest =
                    new SelectObjectRequest(BUCKET_NAME, key)
                            .withInputSerialization(new InputSerialization()
                                    .withCompressionType(CompressionType.NONE)
                                    .withJsonInputFormat(new JsonFormat().withJsonType(JsonType.LINES)))
                            .withOutputSerialization(new OutputSerialization()
                                    .withCrcEnabled(true)
                                    .withJsonOutputFormat(new JsonFormat()))
                            .withExpression("select * from ossobject as s where s.age > 40");

            // 执行查询并保存结果到本地文件
            OSSObject ossObject = ossClient.selectObject(selectObjectRequest);
            // 指定保存路径
            String outputPath = "D:\\\\localpath\\\\result.simple.json";
            writeToFile(ossObject.getObjectContent(), outputPath);
            System.out.println("simple json的查询结果已保存到: " + outputPath);
        } catch (Exception ex) {
            System.out.println(ex.getMessage());
        } finally {
            ossClient.shutdown(); // 关闭OSS客户端
        }
    }

    /**
     * 查询复杂JSON文件内容,并将结果保存到本地文件。
     */
    public static void selectComplexJson() {
        String key = "complex.json"; // 自定义JSON文件路径
        OSS ossClient = null; // 创建OSS客户端实例
        try {
            ossClient = createOSSClient();
        } catch (com.aliyuncs.exceptions.ClientException e) {
            throw new RuntimeException(e);
        }
        String content = "{\n" +
                "  \"contacts\":[\n" +
                "{\n" +
                "  \"firstName\": \"John\",\n" +
                "  \"lastName\": \"Smith\",\n" +
                "  \"isAlive\": true,\n" +
                "  \"age\": 27,\n" +
                "  \"address\": {\n" +
                "    \"streetAddress\": \"21 2nd Street\",\n" +
                "    \"city\": \"New York\",\n" +
                "    \"state\": \"NY\",\n" +
                "    \"postalCode\": \"10021-3100\"\n" +
                "  },\n" +
                "  \"phoneNumbers\": [\n" +
                "    {\n" +
                "      \"type\": \"home\",\n" +
                "      \"number\": \"212 555-1234\"\n" +
                "    },\n" +
                "    {\n" +
                "      \"type\": \"office\",\n" +
                "      \"number\": \"646 555-4567\"\n" +
                "    },\n" +
                "    {\n" +
                "      \"type\": \"mobile\",\n" +
                "      \"number\": \"123 456-7890\"\n" +
                "    }\n" +
                "  ],\n" +
                "  \"children\": [],\n" +
                "  \"spouse\": null\n" +
                "}\n" +
                "]}";

        try {
            // 将JSON内容上传到OSS
            ossClient.putObject(BUCKET_NAME, key, new ByteArrayInputStream(content.getBytes()));

            // 创建SelectObjectRequest并设置查询条件
            SelectObjectRequest selectObjectRequest =
                    new SelectObjectRequest(BUCKET_NAME, key)
                            .withInputSerialization(new InputSerialization()
                                    .withCompressionType(CompressionType.NONE)
                                    .withJsonInputFormat(new JsonFormat().withJsonType(JsonType.LINES)))
                            .withOutputSerialization(new OutputSerialization()
                                    .withCrcEnabled(true)
                                    .withJsonOutputFormat(new JsonFormat()))
                            .withExpression("select * from ossobject.contacts[*] s where s.age = 27");

            // 执行查询并保存结果到本地文件
            OSSObject ossObject = ossClient.selectObject(selectObjectRequest);
            // 指定本地保存路径
            String outputPath = "D:\\\\localpath\\\\result.complex.json";
            writeToFile(ossObject.getObjectContent(), outputPath);
            System.out.println("complex json的查询结果已保存到: " + outputPath);
        } catch (Exception ex) {
            System.out.println(ex.getMessage());
        } finally {
            ossClient.shutdown(); // 关闭OSS客户端
        }
    }

    /**
     * 将输入流写入指定的本地文件。
     *
     * @param in   输入流
     * @param file 文件路径
     */
    private static void writeToFile(InputStream in, String file) {
        try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(file))) {
            byte[] buffer = new byte[1024];
            int bytesRead;
            while ((bytesRead = in.read(buffer)) != -1) {
                outputStream.write(buffer, 0, bytesRead);
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}

SelectObject的更多详情,请参考SelectObject