diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java index faa5890556..29c3f2700c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java @@ -21,34 +21,70 @@ import org.apache.doris.common.UserException; import com.google.common.base.Strings; import com.google.common.collect.ImmutableSet; -import org.apache.parquet.glob.GlobExpander; +import org.apache.commons.lang3.StringUtils; import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; /** * This class represents a fully qualified location in S3 for input/output - * operations expressed as as URI. This implementation is provided to - * ensure compatibility with Hadoop Path implementations that may introduce - * encoding issues with native URI implementation. + * operations expressed as as URI. + *

+ * For AWS S3, uri common styles should be: + * 1. AWS Client Style(Hadoop S3 Style): s3://my-bucket/path/to/file?versionId=abc123&partNumber=77&partNumber=88 + * or + * 2. Virtual Host Style: https://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88 + * or + * 3. Path Style: https://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88 + * + * Regarding the above-mentioned common styles, we can use isPathStyle to control whether to use path style + * or virtual host style. + * "Virtual host style" is the currently mainstream and recommended approach to use, so the default value of + * isPathStyle is false. + * + * Other Styles: + * 1. Virtual Host AWS Client (Hadoop S3) Mixed Style: + * s3://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88 + * or + * 2. Path AWS Client (Hadoop S3) Mixed Style: + * s3://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88 + * + * For these two styles, we can use isPathStyle and forceParsingByStandardUri + * to control whether to use. + * Virtual Host AWS Client (Hadoop S3) Mixed Style: isPathStyle = false && forceParsingByStandardUri = true + * Path AWS Client (Hadoop S3) Mixed Style: isPathStyle = true && forceParsingByStandardUri = true + * + * When the incoming location is url encoded, the encoded string will be returned. + * For getKey(), getQueryParams() will return the encoding string */ public class S3URI { public static final String SCHEME_DELIM = "://"; public static final String PATH_DELIM = "/"; - private static final String QUERY_DELIM = "\\?"; - private static final String FRAGMENT_DELIM = "#"; private static final Set VALID_SCHEMES = ImmutableSet.of("http", "https", "s3", "s3a", "s3n", - "bos", "oss", "cos", "obs"); + "bos", "oss", "cos", "obs"); - private String scheme; - private final String location; - private final String virtualBucket; - private final String bucket; - private final String key; - private boolean forceVirtualHosted; + private static final Set OS_SCHEMES = ImmutableSet.of("s3", "s3a", "s3n", + "bos", "oss", "cos", "obs"); + + private URI uri; + + private String bucket; + private String key; + + private String endpoint; + + private String region; + + private boolean isStandardURL; + private boolean isPathStyle; + private Map> queryParams; /** * Creates a new S3URI based on the bucket and key parsed from the location as defined in: @@ -59,92 +95,180 @@ public class S3URI { * * @param location fully qualified URI */ - public static S3URI create(String location) throws UserException { - return create(location, false); + return create(location, false, false); } - public static S3URI create(String location, boolean forceVirtualHosted) throws UserException { - S3URI s3URI = new S3URI(location, forceVirtualHosted); - return s3URI; + public static S3URI create(String location, boolean isPathStyle) throws UserException { + return new S3URI(location, isPathStyle, false); } - private S3URI(String location, boolean forceVirtualHosted) throws UserException { + public static S3URI create(String location, boolean isPathStyle, boolean forceParsingByStandardUri) + throws UserException { + return new S3URI(location, isPathStyle, forceParsingByStandardUri); + } + + private S3URI(String location, boolean isPathStyle, boolean forceParsingByStandardUri) throws UserException { if (Strings.isNullOrEmpty(location)) { throw new UserException("s3 location can not be null"); } + this.isPathStyle = isPathStyle; + parseUri(location, forceParsingByStandardUri); + } + private void parseUri(String location, boolean forceParsingStandardUri) throws UserException { + validateUri(location); + + if (!forceParsingStandardUri && OS_SCHEMES.contains(uri.getScheme().toLowerCase())) { + parseAwsCliStyleUri(); + } else { + parseStandardUri(); + } + parseEndpointAndRegion(); + } + + private void validateUri(String location) throws UserException { try { - // the location need to be normalized to eliminate double "/", or the hadoop aws api - // won't handle it correctly. - this.location = new URI(location).normalize().toString(); + uri = new URI(location); } catch (URISyntaxException e) { - throw new UserException("Invalid s3 uri: " + e.getMessage()); + throw new UserException(e); + } + if (uri.getScheme() == null || !VALID_SCHEMES.contains(uri.getScheme().toLowerCase())) { + throw new UserException("Invalid scheme: " + this.uri); + } + } + + private void parseAwsCliStyleUri() throws UserException { + bucket = uri.getAuthority(); + if (bucket == null) { + throw new UserException("missing bucket: " + uri); + } + String path = uri.getRawPath(); + if (path.length() > 1) { + key = path.substring(1); + } else { + throw new UserException("missing key: " + uri); } - this.forceVirtualHosted = forceVirtualHosted; - String[] schemeSplit = this.location.split(SCHEME_DELIM); - if (schemeSplit.length != 2) { - throw new UserException("Invalid s3 uri: " + this.location); + addQueryParamsIfNeeded(); + + isStandardURL = false; + this.isPathStyle = false; + } + + private void parseStandardUri() throws UserException { + if (uri.getHost() == null) { + throw new UserException("Invalid S3 URI: no hostname: " + uri); } - this.scheme = schemeSplit[0]; - if (!VALID_SCHEMES.contains(scheme.toLowerCase())) { - throw new UserException("Invalid scheme: " + this.location); - } + addQueryParamsIfNeeded(); - String[] authoritySplit = schemeSplit[1].split(PATH_DELIM, 2); - if (authoritySplit.length != 2) { - throw new UserException("Invalid s3 uri: " + this.location); - } - if (authoritySplit[1].trim().isEmpty()) { - throw new UserException("Invalid s3 key: " + this.location); + if (isPathStyle) { + parsePathStyleUri(); + } else { + parseVirtualHostedStyleUri(); } + isStandardURL = true; + } - // Strip query and fragment if they exist - String path = authoritySplit[1]; - path = path.split(QUERY_DELIM)[0]; - path = path.split(FRAGMENT_DELIM)[0]; - if (this.forceVirtualHosted) { - // If forceVirtualHosted is true, the s3 client will NOT automatically convert to virtual-hosted style. - // So we do some convert manually. Eg: - // endpoint: http://cos.ap-beijing.myqcloud.com - // bucket/path: my_bucket/file.txt - // `virtualBucket` will be "my_bucket" - // `bucket` will be `file.txt` - // So that when assembling the real endpoint will be: http://my_bucket.cos.ap-beijing.myqcloud.com/file.txt - this.virtualBucket = authoritySplit[0]; - String[] paths = path.split("/", 2); - this.bucket = paths[0]; - if (paths.length > 1) { - key = paths[1]; + private void addQueryParamsIfNeeded() { + if (uri.getQuery() != null) { + queryParams = splitQueryString(uri.getRawQuery()).stream().map((s) -> s.split("=")) + .map((s) -> s.length == 1 ? new String[] {s[0], null} : s).collect( + Collectors.groupingBy((a) -> a[0], + Collectors.mapping((a) -> a[1], Collectors.toList()))); + } + } + + private static List splitQueryString(String queryString) { + List results = new ArrayList<>(); + StringBuilder result = new StringBuilder(); + + for (int i = 0; i < queryString.length(); ++i) { + char character = queryString.charAt(i); + if (character != '&') { + result.append(character); } else { - key = ""; + String param = result.toString(); + results.add(param); + result.setLength(0); + } + } + + String param = result.toString(); + results.add(param); + return results; + } + + private void parsePathStyleUri() throws UserException { + String path = uri.getRawPath(); + + if (!StringUtils.isEmpty(path) && !"/".equals(path)) { + int index = path.indexOf('/', 1); + + if (index == -1) { + // No trailing slash, e.g., "https://s3.amazonaws.com/bucket" + bucket = path.substring(1); + throw new UserException("missing key: " + uri); + } else { + bucket = path.substring(1, index); + if (index != path.length() - 1) { + key = path.substring(index + 1); + } else { + throw new UserException("missing key: " + uri); + } } } else { - // If forceVirtualHosted is false, let the s3 client to determine how to covert endpoint, eg: - // For s3 endpoint(start with "s3."), it will convert to virtual-hosted style. - // For others, keep as it is (maybe path-style, maybe virtual-hosted style.) - this.virtualBucket = ""; - this.bucket = authoritySplit[0]; - key = path; + throw new UserException("missing bucket: " + this.uri); } } - public List expand(String path) { - return GlobExpander.expand(path); + private void parseVirtualHostedStyleUri() throws UserException { + bucket = uri.getHost().split("\\.")[0]; + + String path = uri.getRawPath(); + if (!StringUtils.isEmpty(path) && !"/".equals(path)) { + key = path.substring(1); + } else { + throw new UserException("missing key: " + this.uri); + } } - public String getScheme() { - return this.scheme; - } + private void parseEndpointAndRegion() { + // parse endpoint + if (isStandardURL) { + if (isPathStyle) { + endpoint = uri.getAuthority(); + } else { // virtual_host_style + if (uri.getAuthority() == null) { + endpoint = null; + return; + } + String[] splits = uri.getAuthority().split("\\.", 2); + if (splits.length < 2) { + endpoint = null; + return; + } + endpoint = splits[1]; + } + } else { + endpoint = null; + } + if (endpoint == null) { + return; + } - public String getBucketScheme() { - return scheme + "://" + bucket; - } - - public String getVirtualBucket() { - return virtualBucket; + // parse region + String[] endpointSplits = endpoint.split("\\."); + if (endpointSplits.length < 2) { + return; + } + if (endpointSplits[0].contains("oss-")) { + // compatible with the endpoint: oss-cn-bejing.aliyuncs.com + region = endpointSplits[0]; + return; + } + region = endpointSplits[1]; } /** @@ -161,15 +285,30 @@ public class S3URI { return key; } - /* - * @return original, unmodified location - */ - public String getLocation() { - return location; + public Optional>> getQueryParams() { + return Optional.ofNullable(queryParams); + } + + public Optional getEndpoint() { + return Optional.ofNullable(endpoint); + } + + public Optional getRegion() { + return Optional.ofNullable(region); } @Override public String toString() { - return location; + final StringBuilder sb = new StringBuilder("S3URI{"); + sb.append("uri=").append(uri); + sb.append(", bucket='").append(bucket).append('\''); + sb.append(", key='").append(key).append('\''); + sb.append(", endpoint='").append(endpoint).append('\''); + sb.append(", region='").append(region).append('\''); + sb.append(", isStandardURL=").append(isStandardURL); + sb.append(", isPathStyle=").append(isPathStyle); + sb.append(", queryParams=").append(queryParams); + sb.append('}'); + return sb.toString(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java index 0195783191..57b53627c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java @@ -45,7 +45,8 @@ import java.time.Duration; public class S3Util { - public static S3Client buildS3Client(URI endpoint, String region, CloudCredential credential) { + public static S3Client buildS3Client(URI endpoint, String region, CloudCredential credential, + boolean isUsePathStyle) { AwsCredentialsProvider scp; AwsCredentials awsCredential; if (!credential.isTemporary()) { @@ -89,10 +90,9 @@ public class S3Util { .region(Region.of(region)) .overrideConfiguration(clientConf) // disable chunkedEncoding because of bos not supported - // use virtual hosted-style access .serviceConfiguration(S3Configuration.builder() .chunkedEncodingEnabled(false) - .pathStyleAccessEnabled(true) + .pathStyleAccessEnabled(isUsePathStyle) .build()) .build(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/DLFCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/DLFCatalog.java index 24f2df5acd..ca5ccd5f35 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/DLFCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/DLFCatalog.java @@ -21,6 +21,7 @@ import org.apache.doris.common.util.S3Util; import org.apache.doris.datasource.credentials.CloudCredential; import org.apache.doris.datasource.iceberg.HiveCompatibleCatalog; import org.apache.doris.datasource.iceberg.dlf.client.DLFCachedClientPool; +import org.apache.doris.datasource.property.PropertyConverter; import org.apache.doris.datasource.property.constants.OssProperties; import org.apache.doris.datasource.property.constants.S3Properties; @@ -53,19 +54,21 @@ public class DLFCatalog extends HiveCompatibleCatalog { String endpoint = properties.getOrDefault(Constants.ENDPOINT_KEY, properties.get(S3Properties.Env.ENDPOINT)); CloudCredential credential = new CloudCredential(); credential.setAccessKey(properties.getOrDefault(OssProperties.ACCESS_KEY, - properties.get(S3Properties.Env.ACCESS_KEY))); + properties.get(S3Properties.Env.ACCESS_KEY))); credential.setSecretKey(properties.getOrDefault(OssProperties.SECRET_KEY, - properties.get(S3Properties.Env.SECRET_KEY))); + properties.get(S3Properties.Env.SECRET_KEY))); if (properties.containsKey(OssProperties.SESSION_TOKEN) || properties.containsKey(S3Properties.Env.TOKEN)) { credential.setSessionToken(properties.getOrDefault(OssProperties.SESSION_TOKEN, properties.get(S3Properties.Env.TOKEN))); } String region = properties.getOrDefault(OssProperties.REGION, properties.get(S3Properties.Env.REGION)); + boolean isUsePathStyle = properties.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false") + .equalsIgnoreCase("true"); // s3 file io just supports s3-like endpoint String s3Endpoint = endpoint.replace(region, "s3." + region); URI endpointUri = URI.create(s3Endpoint); - FileIO io = new S3FileIO(() -> S3Util.buildS3Client(endpointUri, region, credential)); + FileIO io = new S3FileIO(() -> S3Util.buildS3Client(endpointUri, region, credential, isUsePathStyle)); io.initialize(properties); return io; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java index b385fb838b..bccd3147a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java @@ -58,6 +58,9 @@ public class PropertyConverter { private static final Logger LOG = LogManager.getLogger(PropertyConverter.class); public static final String USE_PATH_STYLE = "use_path_style"; + public static final String USE_PATH_STYLE_DEFAULT_VALUE = "false"; + public static final String FORCE_PARSING_BY_STANDARD_URI = "force_parsing_by_standard_uri"; + public static final String FORCE_PARSING_BY_STANDARD_URI_DEFAULT_VALUE = "false"; /** * Convert properties defined at doris to metadata properties on Cloud diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/ObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/ObjStorage.java index b964e3022a..3c4246d0fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/ObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/ObjStorage.java @@ -32,7 +32,7 @@ import java.io.File; * @param cloud SDK Client */ public interface ObjStorage { - C getClient(String bucket) throws UserException; + C getClient() throws UserException; Triple getStsToken() throws DdlException; diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java index d1e8e74b49..11ec72923d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java @@ -29,7 +29,6 @@ import org.apache.doris.datasource.property.constants.S3Properties; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Triple; import org.apache.http.HttpStatus; -import org.apache.http.client.utils.URIBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; @@ -68,15 +67,9 @@ public class S3ObjStorage implements ObjStorage { protected Map properties; - // false: the s3 client will automatically convert endpoint to virtual-hosted style, eg: - // endpoint: http://s3.us-east-2.amazonaws.com - // bucket/path: my_bucket/file.txt - // auto convert: http://my_bucket.s3.us-east-2.amazonaws.com/file.txt - // true: the s3 client will NOT automatically convert endpoint to virtual-hosted style, we need to do some tricks: - // endpoint: http://cos.ap-beijing.myqcloud.com - // bucket/path: my_bucket/file.txt - // convert manually: See S3URI() - private boolean forceHostedStyle = false; + private boolean isUsePathStyle = false; + + private boolean forceParsingByStandardUri = false; public S3ObjStorage(Map properties) { this.properties = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); @@ -104,38 +97,27 @@ public class S3ObjStorage implements ObjStorage { // Some of them, such as aliyun's oss, only support virtual hosted-style, // and some of them(ceph) may only support // path-style, so we need to do some additional conversion. - // - // use_path_style | !use_path_style - // S3 forceHostedStyle=false | forceHostedStyle=false - // !S3 forceHostedStyle=false | forceHostedStyle=true - // - // That is, for S3 endpoint, ignore the `use_path_style` property, and the s3 client will automatically use - // virtual hosted-sytle. - // And for other endpoint, if `use_path_style` is true, use path style. Otherwise, use virtual hosted-sytle. - // 'forceHostedStyle==false' means that use path style. - if (!this.properties.get(S3Properties.ENDPOINT).toLowerCase().contains(S3Properties.S3_PREFIX)) { - String usePathStyle = this.properties.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false"); - boolean isUsePathStyle = usePathStyle.equalsIgnoreCase("true"); - // when it's path style, we will not use virtual hosted-style - forceHostedStyle = !isUsePathStyle; - } else { - forceHostedStyle = false; - } + isUsePathStyle = this.properties.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false") + .equalsIgnoreCase("true"); + forceParsingByStandardUri = this.properties.getOrDefault(PropertyConverter.FORCE_PARSING_BY_STANDARD_URI, + "false").equalsIgnoreCase("true"); } @Override - public S3Client getClient(String bucket) throws UserException { + public S3Client getClient() throws UserException { if (client == null) { - URI tmpEndpoint = URI.create(properties.get(S3Properties.ENDPOINT)); - URI endpoint = StringUtils.isEmpty(bucket) ? tmpEndpoint : - URI.create(new URIBuilder(tmpEndpoint).setHost(bucket + "." + tmpEndpoint.getHost()).toString()); + String endpointStr = properties.get(S3Properties.ENDPOINT); + if (!endpointStr.contains("://")) { + endpointStr = "http://" + endpointStr; + } + URI endpoint = URI.create(endpointStr); CloudCredential credential = new CloudCredential(); credential.setAccessKey(properties.get(S3Properties.ACCESS_KEY)); credential.setSecretKey(properties.get(S3Properties.SECRET_KEY)); if (properties.containsKey(S3Properties.SESSION_TOKEN)) { credential.setSessionToken(properties.get(S3Properties.SESSION_TOKEN)); } - client = S3Util.buildS3Client(endpoint, properties.get(S3Properties.REGION), credential); + client = S3Util.buildS3Client(endpoint, properties.get(S3Properties.REGION), credential, isUsePathStyle); } return client; } @@ -148,8 +130,8 @@ public class S3ObjStorage implements ObjStorage { @Override public Status headObject(String remotePath) { try { - S3URI uri = S3URI.create(remotePath, forceHostedStyle); - HeadObjectResponse response = getClient(uri.getVirtualBucket()) + S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); + HeadObjectResponse response = getClient() .headObject(HeadObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build()); LOG.info("head file " + remotePath + " success: " + response.toString()); return Status.OK; @@ -169,8 +151,8 @@ public class S3ObjStorage implements ObjStorage { @Override public Status getObject(String remoteFilePath, File localFile) { try { - S3URI uri = S3URI.create(remoteFilePath, forceHostedStyle); - GetObjectResponse response = getClient(uri.getVirtualBucket()).getObject( + S3URI uri = S3URI.create(remoteFilePath, isUsePathStyle, forceParsingByStandardUri); + GetObjectResponse response = getClient().getObject( GetObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), localFile.toPath()); LOG.info("get file " + remoteFilePath + " success: " + response.toString()); return Status.OK; @@ -189,9 +171,9 @@ public class S3ObjStorage implements ObjStorage { @Override public Status putObject(String remotePath, @Nullable RequestBody requestBody) { try { - S3URI uri = S3URI.create(remotePath, forceHostedStyle); + S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); PutObjectResponse response = - getClient(uri.getVirtualBucket()) + getClient() .putObject( PutObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), requestBody); @@ -209,9 +191,9 @@ public class S3ObjStorage implements ObjStorage { @Override public Status deleteObject(String remotePath) { try { - S3URI uri = S3URI.create(remotePath, forceHostedStyle); + S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); DeleteObjectResponse response = - getClient(uri.getVirtualBucket()) + getClient() .deleteObject( DeleteObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build()); LOG.info("delete file " + remotePath + " success: " + response.toString()); @@ -231,7 +213,7 @@ public class S3ObjStorage implements ObjStorage { @Override public Status deleteObjects(String absolutePath) { try { - S3URI baseUri = S3URI.create(absolutePath, forceHostedStyle); + S3URI baseUri = S3URI.create(absolutePath, isUsePathStyle, forceParsingByStandardUri); String continuationToken = ""; boolean isTruncated = false; long totalObjects = 0; @@ -250,7 +232,7 @@ public class S3ObjStorage implements ObjStorage { .delete(delete) .build(); - DeleteObjectsResponse resp = getClient(baseUri.getVirtualBucket()).deleteObjects(req); + DeleteObjectsResponse resp = getClient().deleteObjects(req); if (resp.errors().size() > 0) { LOG.warn("{} errors returned while deleting {} objects for dir {}", resp.errors().size(), objectList.size(), absolutePath); @@ -268,7 +250,7 @@ public class S3ObjStorage implements ObjStorage { } catch (DdlException e) { return new Status(Status.ErrCode.COMMON_ERROR, "list objects for delete objects failed: " + e.getMessage()); } catch (Exception e) { - LOG.warn("delete objects {} failed, force visual host style {}", absolutePath, e, forceHostedStyle); + LOG.warn(String.format("delete objects %s failed", absolutePath), e); return new Status(Status.ErrCode.COMMON_ERROR, "delete objects failed: " + e.getMessage()); } } @@ -276,9 +258,9 @@ public class S3ObjStorage implements ObjStorage { @Override public Status copyObject(String origFilePath, String destFilePath) { try { - S3URI origUri = S3URI.create(origFilePath); - S3URI descUri = S3URI.create(destFilePath, forceHostedStyle); - CopyObjectResponse response = getClient(descUri.getVirtualBucket()) + S3URI origUri = S3URI.create(origFilePath, isUsePathStyle, forceParsingByStandardUri); + S3URI descUri = S3URI.create(destFilePath, isUsePathStyle, forceParsingByStandardUri); + CopyObjectResponse response = getClient() .copyObject( CopyObjectRequest.builder() .copySource(origUri.getBucket() + "/" + origUri.getKey()) @@ -299,31 +281,16 @@ public class S3ObjStorage implements ObjStorage { @Override public RemoteObjects listObjects(String absolutePath, String continuationToken) throws DdlException { try { - S3URI uri = S3URI.create(absolutePath, forceHostedStyle); + S3URI uri = S3URI.create(absolutePath, isUsePathStyle, forceParsingByStandardUri); String bucket = uri.getBucket(); String prefix = uri.getKey(); - if (!StringUtils.isEmpty(uri.getVirtualBucket())) { - // Support s3 compatible service. The generated HTTP request for list objects likes: - // - // GET /?list-type=2&prefix= - prefix = bucket + "/" + prefix; - String endpoint = properties.get(S3Properties.ENDPOINT); - if (endpoint.contains("cos.")) { - bucket = "/"; - } else if (endpoint.contains("oss-")) { - bucket = uri.getVirtualBucket(); - } else if (endpoint.contains("obs.")) { - // FIXME: unlike cos and oss, the obs will report 'The specified key does not exist'. - throw new DdlException("obs does not support list objects via s3 sdk. path: " + absolutePath); - } - } ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder() .bucket(bucket) .prefix(normalizePrefix(prefix)); if (!StringUtils.isEmpty(continuationToken)) { requestBuilder.continuationToken(continuationToken); } - ListObjectsV2Response response = getClient(uri.getVirtualBucket()).listObjectsV2(requestBuilder.build()); + ListObjectsV2Response response = getClient().listObjectsV2(requestBuilder.build()); List remoteObjects = new ArrayList<>(); for (S3Object c : response.contents()) { String relativePath = getRelativePath(prefix, c.key()); @@ -331,7 +298,7 @@ public class S3ObjStorage implements ObjStorage { } return new RemoteObjects(remoteObjects, response.isTruncated(), response.nextContinuationToken()); } catch (Exception e) { - LOG.warn("Failed to list objects for S3: {}", absolutePath, e); + LOG.warn(String.format("Failed to list objects for S3: %s", absolutePath), e); throw new DdlException("Failed to list objects for S3, Error message: " + e.getMessage(), e); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java index 6141222246..44cbd48226 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java @@ -54,8 +54,6 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { ImmutableSet.of("access_key", "secret_key", "session_token", "region", "ACCESS_KEY", "SECRET_KEY", "SESSION_TOKEN", "REGION"); - private String virtualBucket = ""; - public S3TableValuedFunction(Map properties) throws AnalysisException { // 1. analyze common properties Map otherProps = super.parseCommonProperties(properties); @@ -67,13 +65,20 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { } forwardCompatibleDeprecatedKeys(otherProps); - String usePathStyle = getOrDefaultAndRemove(otherProps, PropertyConverter.USE_PATH_STYLE, "false"); - boolean forceVirtualHosted = isVirtualHosted(uriStr, Boolean.parseBoolean(usePathStyle)); - S3URI s3uri = getS3Uri(uriStr, forceVirtualHosted); - String endpoint = forceVirtualHosted - ? getEndpointAndSetVirtualBucket(s3uri, otherProps) : s3uri.getBucketScheme(); + String usePathStyle = getOrDefaultAndRemove(otherProps, PropertyConverter.USE_PATH_STYLE, + PropertyConverter.USE_PATH_STYLE_DEFAULT_VALUE); + String forceParsingByStandardUri = getOrDefaultAndRemove(otherProps, + PropertyConverter.FORCE_PARSING_BY_STANDARD_URI, + PropertyConverter.FORCE_PARSING_BY_STANDARD_URI_DEFAULT_VALUE); + + S3URI s3uri = getS3Uri(uriStr, Boolean.parseBoolean(usePathStyle.toLowerCase()), + Boolean.parseBoolean(forceParsingByStandardUri.toLowerCase())); + String endpoint = otherProps.containsKey(S3Properties.ENDPOINT) ? otherProps.get(S3Properties.ENDPOINT) : + s3uri.getEndpoint().orElseThrow(() -> + new AnalysisException(String.format("Properties '%s' is required.", S3Properties.ENDPOINT))); if (!otherProps.containsKey(S3Properties.REGION)) { - String region = S3Properties.getRegionOfEndpoint(endpoint); + String region = s3uri.getRegion().orElseThrow(() -> + new AnalysisException(String.format("Properties '%s' is required.", S3Properties.REGION))); otherProps.put(S3Properties.REGION, region); } checkNecessaryS3Properties(otherProps); @@ -89,12 +94,7 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { locationProperties.put(PropertyConverter.USE_PATH_STYLE, usePathStyle); locationProperties.putAll(S3ClientBEProperties.getBeFSProperties(locationProperties)); - if (forceVirtualHosted) { - filePath = NAME + S3URI.SCHEME_DELIM + virtualBucket + S3URI.PATH_DELIM - + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey(); - } else { - filePath = NAME + S3URI.SCHEME_DELIM + s3uri.getKey(); - } + filePath = NAME + S3URI.SCHEME_DELIM + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey(); if (FeConstants.runningUnitTest) { // Just check @@ -120,36 +120,9 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { // do not check ak and sk, because we can read them from system environment. } - private String getEndpointAndSetVirtualBucket(S3URI s3uri, Map props) - throws AnalysisException { - String[] fields = s3uri.getVirtualBucket().split("\\.", 2); - virtualBucket = fields[0]; - if (fields.length > 1) { - // At this point, s3uri.getVirtualBucket() is: virtualBucket.endpoint, Eg: - // uri: http://my_bucket.cos.ap-beijing.myqcloud.com/file.txt - // s3uri.getVirtualBucket() = my_bucket.cos.ap-beijing.myqcloud.com, - // so we need separate virtualBucket and endpoint. - return fields[1]; - } else if (props.containsKey(S3Properties.ENDPOINT)) { - return props.get(S3Properties.ENDPOINT); - } else { - throw new AnalysisException("can not parse endpoint, please check uri."); - } - } - - private boolean isVirtualHosted(String uri, boolean usePathStyle) { - if (uri.toLowerCase().startsWith("s3")) { - // s3 protocol, default virtual-hosted style - return true; - } else { - // not s3 protocol, forceVirtualHosted is determined by USE_PATH_STYLE. - return !usePathStyle; - } - } - - private S3URI getS3Uri(String uri, boolean forceVirtualHosted) throws AnalysisException { + private S3URI getS3Uri(String uri, boolean isPathStyle, boolean forceParsingStandardUri) throws AnalysisException { try { - return S3URI.create(uri, forceVirtualHosted); + return S3URI.create(uri, isPathStyle, forceParsingStandardUri); } catch (UserException e) { throw new AnalysisException("parse s3 uri failed, uri = " + uri, e); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3URITest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3URITest.java index 383926d884..1d92158c9c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3URITest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3URITest.java @@ -22,35 +22,157 @@ import org.apache.doris.common.UserException; import org.junit.Assert; import org.junit.Test; +import java.util.Optional; + public class S3URITest { @Test public void testLocationParsing() throws UserException { - String p1 = "s3://bucket/path/to/file"; - S3URI uri1 = S3URI.create(p1); + String p1 = "s3://my-bucket/path/to/file"; + boolean isPathStyle = false; + boolean forceParsingStandardUri = false; + S3URI uri1 = S3URI.create(p1, isPathStyle, forceParsingStandardUri); - Assert.assertEquals("bucket", uri1.getBucket()); + Assert.assertEquals("my-bucket", uri1.getBucket()); Assert.assertEquals("path/to/file", uri1.getKey()); - Assert.assertEquals(p1, uri1.toString()); + Assert.assertEquals(Optional.empty(), uri1.getRegion()); + Assert.assertEquals(Optional.empty(), uri1.getEndpoint()); + Assert.assertEquals(Optional.empty(), uri1.getQueryParams()); } @Test - public void testPathLocationParsing() throws UserException { - String p1 = "s3://bucket/path/"; - S3URI uri1 = S3URI.create(p1); + public void testVirtualHostStyleParsing() throws UserException { + String p1 = "https://my-bucket.s3.us-west-1.amazonaws.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88"; + boolean isPathStyle = false; + boolean forceParsingStandardUri = false; + S3URI uri1 = S3URI.create(p1, isPathStyle, forceParsingStandardUri); - Assert.assertEquals("bucket", uri1.getBucket()); - Assert.assertEquals("path/", uri1.getKey()); - Assert.assertEquals(p1, uri1.toString()); + Assert.assertEquals("my-bucket", uri1.getBucket()); + Assert.assertEquals("resources/doc.txt", uri1.getKey()); + Assert.assertEquals("s3.us-west-1.amazonaws.com", uri1.getEndpoint().get()); + Assert.assertEquals("us-west-1", uri1.getRegion().get()); + Assert.assertEquals("abc123", uri1.getQueryParams().get().get("versionId").get(0)); + Assert.assertEquals(2, uri1.getQueryParams().get().get("partNumber").size()); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("77")); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("88")); + } + + @Test + public void testPathStyleParsing() throws UserException { + String p1 = "https://s3.us-west-1.amazonaws.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88"; + boolean isPathStyle = true; + boolean forceParsingStandardUri = false; + S3URI uri1 = S3URI.create(p1, isPathStyle, forceParsingStandardUri); + + Assert.assertEquals("my-bucket", uri1.getBucket()); + Assert.assertEquals("resources/doc.txt", uri1.getKey()); + Assert.assertEquals("s3.us-west-1.amazonaws.com", uri1.getEndpoint().get()); + Assert.assertEquals("us-west-1", uri1.getRegion().get()); + Assert.assertEquals("abc123", uri1.getQueryParams().get().get("versionId").get(0)); + Assert.assertEquals(2, uri1.getQueryParams().get().get("partNumber").size()); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("77")); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("88")); + } + + @Test + public void testForceParsingStandardUri() throws UserException { + String p1 = "s3://my-bucket.s3.us-west-1.amazonaws.com/path/to/file"; + S3URI uri1 = S3URI.create(p1, false, true); + + Assert.assertEquals("my-bucket", uri1.getBucket()); + Assert.assertEquals("path/to/file", uri1.getKey()); + Assert.assertEquals("s3.us-west-1.amazonaws.com", uri1.getEndpoint().get()); + Assert.assertEquals("us-west-1", uri1.getRegion().get()); + Assert.assertEquals(Optional.empty(), uri1.getQueryParams()); + + String p2 = "s3://s3.us-west-1.amazonaws.com/my-bucket/path/to/file"; + S3URI uri2 = S3URI.create(p2, true, true); + + Assert.assertEquals("my-bucket", uri2.getBucket()); + Assert.assertEquals("path/to/file", uri2.getKey()); + Assert.assertEquals("s3.us-west-1.amazonaws.com", uri2.getEndpoint().get()); + Assert.assertEquals(Optional.empty(), uri1.getQueryParams()); + } + + @Test + public void testOSSVirtualHostStyle() throws UserException { + String p1 = "https://my-bucket.oss-cn-bejing.aliyuncs.com/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88"; + boolean isPathStyle = false; + boolean forceParsingStandardUri = false; + S3URI uri1 = S3URI.create(p1, isPathStyle, forceParsingStandardUri); + + Assert.assertEquals("my-bucket", uri1.getBucket()); + Assert.assertEquals("resources/doc.txt", uri1.getKey()); + Assert.assertEquals("oss-cn-bejing.aliyuncs.com", uri1.getEndpoint().get()); + Assert.assertEquals("oss-cn-bejing", uri1.getRegion().get()); + Assert.assertEquals("abc123", uri1.getQueryParams().get().get("versionId").get(0)); + Assert.assertEquals(2, uri1.getQueryParams().get().get("partNumber").size()); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("77")); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("88")); + } + + @Test + public void testOSSPathStyle() throws UserException { + String p1 = "https://oss-cn-bejing.aliyuncs.com/my-bucket/resources/doc.txt?versionId=abc123&partNumber=77&partNumber=88"; + boolean isPathStyle = true; + boolean forceParsingStandardUri = false; + S3URI uri1 = S3URI.create(p1, isPathStyle, forceParsingStandardUri); + + Assert.assertEquals("my-bucket", uri1.getBucket()); + Assert.assertEquals("resources/doc.txt", uri1.getKey()); + Assert.assertEquals("oss-cn-bejing.aliyuncs.com", uri1.getEndpoint().get()); + Assert.assertEquals("oss-cn-bejing", uri1.getRegion().get()); + Assert.assertEquals("abc123", uri1.getQueryParams().get().get("versionId").get(0)); + Assert.assertEquals(2, uri1.getQueryParams().get().get("partNumber").size()); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("77")); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("88")); + } + + @Test + public void testCOSVirtualHostStyle() throws UserException { + String p1 = "https://my-bucket.cos.ap-beijing.myqcloud.com/resources/doc.txt"; + boolean isPathStyle = false; + boolean forceParsingStandardUri = false; + S3URI uri1 = S3URI.create(p1, isPathStyle, forceParsingStandardUri); + + Assert.assertEquals("my-bucket", uri1.getBucket()); + Assert.assertEquals("resources/doc.txt", uri1.getKey()); + Assert.assertEquals("cos.ap-beijing.myqcloud.com", uri1.getEndpoint().get()); + Assert.assertEquals("ap-beijing", uri1.getRegion().get()); + } + + @Test + public void testOBSVirtualHostStyle() throws UserException { + String p1 = "https://my-bucket.obs.cn-north-4.myhuaweicloud.com/test_obs/000000_0"; + boolean isPathStyle = false; + boolean forceParsingStandardUri = false; + S3URI uri1 = S3URI.create(p1, isPathStyle, forceParsingStandardUri); + + Assert.assertEquals("my-bucket", uri1.getBucket()); + Assert.assertEquals("test_obs/000000_0", uri1.getKey()); + Assert.assertEquals("obs.cn-north-4.myhuaweicloud.com", uri1.getEndpoint().get()); + Assert.assertEquals("cn-north-4", uri1.getRegion().get()); } @Test public void testEncodedString() throws UserException { - String p1 = "s3://bucket/path%20to%20file"; - S3URI uri1 = S3URI.create(p1); + String p1 = "s3://bucket/path%20to%20file?txt=hello%20world&partNumber=77&partNumber=88"; + boolean isPathStyle = false; + boolean forceParsingStandardUri = false; + S3URI uri1 = S3URI.create(p1, isPathStyle, forceParsingStandardUri); Assert.assertEquals("bucket", uri1.getBucket()); Assert.assertEquals("path%20to%20file", uri1.getKey()); - Assert.assertEquals(p1, uri1.toString()); + Assert.assertEquals(Optional.empty(), uri1.getEndpoint()); + Assert.assertEquals(Optional.empty(), uri1.getRegion()); + Assert.assertEquals("hello%20world", uri1.getQueryParams().get().get("txt").get(0)); + Assert.assertEquals(2, uri1.getQueryParams().get().get("partNumber").size()); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("77")); + Assert.assertTrue(uri1.getQueryParams().get().get("partNumber").contains("88")); + } + + @Test(expected = UserException.class) + public void missingBucket() throws UserException { + S3URI.create("https:///"); } @Test(expected = UserException.class) @@ -75,6 +197,9 @@ public class S3URITest { Assert.assertEquals("bucket", uri1.getBucket()); Assert.assertEquals("path/to/file", uri1.getKey()); - Assert.assertEquals(p1, uri1.toString()); + Assert.assertEquals(Optional.empty(), uri1.getEndpoint()); + Assert.assertEquals(Optional.empty(), uri1.getRegion()); + Assert.assertEquals("foo", uri1.getQueryParams().get().get("query").get(0)); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java index 4c4f524621..9050035f7c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java @@ -225,7 +225,7 @@ public class PropertyConverterTest extends TestWithFeService { public void testS3TVFPropertiesConverter() throws Exception { FeConstants.runningUnitTest = true; String queryOld = "select * from s3(\n" - + " 'uri' = 'http://s3.us-east-1.amazonaws.com/test.parquet',\n" + + " 'uri' = 'http://s3.us-east-1.amazonaws.com/my-bucket/test.parquet',\n" + " 'access_key' = 'akk',\n" + " 'secret_key' = 'skk',\n" + " 'region' = 'us-east-1',\n" @@ -239,7 +239,7 @@ public class PropertyConverterTest extends TestWithFeService { Assertions.assertEquals(s3Tvf.getBrokerDesc().getProperties().size(), 9); String queryNew = "select * from s3(\n" - + " 'uri' = 'http://s3.us-east-1.amazonaws.com/test.parquet',\n" + + " 'uri' = 'http://s3.us-east-1.amazonaws.com/my-bucket/test.parquet',\n" + " 's3.access_key' = 'akk',\n" + " 's3.secret_key' = 'skk',\n" + " 'format' = 'parquet',\n" diff --git a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3FileSystemTest.java b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3FileSystemTest.java index d5983bb3ab..442883573c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3FileSystemTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3FileSystemTest.java @@ -98,12 +98,12 @@ public class S3FileSystemTest { mockedClient.setMockedData(content.getBytes()); new MockUp(S3ObjStorage.class) { @Mock - public S3Client getClient(String bucket) throws UserException { + public S3Client getClient() throws UserException { return mockedClient; } }; S3ObjStorage mockedStorage = new S3ObjStorage(properties); - Assertions.assertTrue(mockedStorage.getClient("mocked") instanceof MockedS3Client); + Assertions.assertTrue(mockedStorage.getClient() instanceof MockedS3Client); // inject storage to file system. fileSystem = new S3FileSystem(mockedStorage); new MockUp(S3FileSystem.class) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageTest.java b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageTest.java index c4dce56c57..f5995cc9b2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageTest.java @@ -19,7 +19,6 @@ package org.apache.doris.fs.obj; import org.apache.doris.backup.Status; import org.apache.doris.common.UserException; -import org.apache.doris.common.util.S3URI; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.Assertions; @@ -113,14 +112,7 @@ class S3ObjStorageTest { client.setAccessible(true); MockedS3Client mockedClient = new MockedS3Client(); client.set(storage, mockedClient); - Assertions.assertTrue(storage.getClient("mocked") instanceof MockedS3Client); - - S3URI vUri = S3URI.create("s3://bucket/key", true); - S3URI uri = S3URI.create("s3://bucket/key", false); - Assertions.assertEquals(vUri.getVirtualBucket(), "bucket"); - Assertions.assertEquals(vUri.getBucket(), "key"); - Assertions.assertEquals(uri.getVirtualBucket(), ""); - Assertions.assertEquals(uri.getBucket(), "bucket"); + Assertions.assertTrue(storage.getClient() instanceof MockedS3Client); Status st = storage.headObject("s3://bucket/key"); Assertions.assertEquals(Status.OK, st); diff --git a/regression-test/suites/load_p0/tvf/test_s3_tvf.groovy b/regression-test/suites/load_p0/tvf/test_s3_tvf.groovy index 391b76d37e..83a93fd5d8 100644 --- a/regression-test/suites/load_p0/tvf/test_s3_tvf.groovy +++ b/regression-test/suites/load_p0/tvf/test_s3_tvf.groovy @@ -56,40 +56,46 @@ suite("test_s3_tvf", "load_p0") { attributeList.add(new TvfAttribute(table, ["K00", "K01", "K02", "K03", "K04", "K05", "K06", "K07", "K08", "K09", "K10", "K11", "K12", "K13", "K14", "K15", "K16", "K17", "K18"], "", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "c1 as k00,c2 as k01,c3 as k02,c4 as k03,c5 as k04,c6 as k05,c7 as k06,c8 as k07,c9 as k08,c10 as k09,c11 as k10,c12 as k11,c13 as k12,c14 as k13,c15 as k14,c16 as k15,c17 as k16,c18 as k17,c19 as k18, to_bitmap(c6) as k19, HLL_HASH(c6) as k20, TO_QUANTILE_STATE(c5, 1.0) as k21, to_bitmap(c6) as kd19, HLL_HASH(c6) as kd20, TO_QUANTILE_STATE(c5, 1.0) as kd21", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18,k19,k20,k21,kd19,kd20,kd21" ,"", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, ["K00", "K01", "K02", "K03", "K04", "K05", "K06", "K07", "K08", "K09", "K10", "K11", "K12", "K13", "K14", "K15", "K16", "K17"], "", "").addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_array_data.csv") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_array_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } for(String table : basicTables) { attributeList.add(new TvfAttribute(table, ["k00", "k01", "k02", "k03", "k04", "k05", "k06", "k07", "k08", "k09", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17", "k18"], "", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "c1 as k00,c2 as k01,c3 as k02,c4 as k03,c5 as k04,c6 as k05,c7 as k06,c8 as k07,c9 as k08,c10 as k09,c11 as k10,c12 as k11,c13 as k12,c14 as k13,c15 as k14,c16 as k15,c17 as k16,c18 as k17,c19 as k18, to_bitmap(c6) as k19, HLL_HASH(c6) as k20, TO_QUANTILE_STATE(c5, 1.0) as k21, to_bitmap(c6) as kd19, HLL_HASH(c6) as kd20, TO_QUANTILE_STATE(c5, 1.0) as kd21", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18,k19,k20,k21,kd19,kd20,kd21" ,"", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, ["k00", "k01", "k02", "k03", "k04", "k05", "k06", "k07", "k08", "k09", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17"], "", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_array_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } for(String table : basicTables) { @@ -140,20 +146,23 @@ suite("test_s3_tvf", "load_p0") { attributeList.add(new TvfAttribute(table, ["k00", "k01", "k02", "k03", "k04", "k05", "k06", "k07", "k08", "k09", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17", "k18"], "", "", true) .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data_with_errors.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "c1 as k00,c2 as k01,c3 as k02,c4 as k03,c5 as k04,c6 as k05,c7 as k06,c8 as k07,c9 as k08,c10 as k09,c11 as k10,c12 as k11,c13 as k12,c14 as k13,c15 as k14,c16 as k15,c17 as k16,c18 as k17,c19 as k18, to_bitmap(c6) as k19, HLL_HASH(c6) as k20, TO_QUANTILE_STATE(c5, 1.0) as k21, to_bitmap(c6) as kd19, HLL_HASH(c6) as kd20, TO_QUANTILE_STATE(c5, 1.0) as kd21", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18,k19,k20,k21,kd19,kd20,kd21" ,"", "", true) .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data_with_errors.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, ["k00", "k01", "k02", "k03", "k04", "k05", "k06", "k07", "k08", "k09", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17", "kd01", "kd02", "kd03", "kd04", "kd05", "kd06", "kd07", "kd08", "kd09", "kd10", "kd11", "kd12", "kd13", "kd14", "kd15", "kd16"], "", "", true) .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_array_data_with_errors.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } /* skip lines */ @@ -162,7 +171,8 @@ suite("test_s3_tvf", "load_p0") { .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data_with_errors.csv") .addProperty("format", "csv") .addProperty("column_separator", "|") - .addProperty("skip_lines", "10")) + .addProperty("skip_lines", "10") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "c1 as k00,c2 as k01,c3 as k02,c4 as k03,c5 as k04,c6 as k05,c7 as k06,c8 as k07,c9 as k08,c10 as k09,c11 as k10,c12 as k11,c13 as k12,c14 as k13,c15 as k14,c16 as k15,c17 as k16,c18 as k17,c19 as k18, to_bitmap(c6) as k19, HLL_HASH(c6) as k20, TO_QUANTILE_STATE(c5, 1.0) as k21, to_bitmap(c6) as kd19, HLL_HASH(c6) as kd20, TO_QUANTILE_STATE(c5, 1.0) as kd21", @@ -170,14 +180,16 @@ suite("test_s3_tvf", "load_p0") { .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data_with_errors.csv") .addProperty("format", "csv") .addProperty("column_separator", "|") - .addProperty("skip_lines", "10")) + .addProperty("skip_lines", "10") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, ["k00", "k01", "k02", "k03", "k04", "k05", "k06", "k07", "k08", "k09", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17"], "", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_array_data_with_errors.csv") .addProperty("format", "csv") .addProperty("column_separator", "|") - .addProperty("skip_lines", "10")) + .addProperty("skip_lines", "10") + .addProperty("force_parsing_by_standard_uri", "true")) } /* compress type */ @@ -251,67 +263,77 @@ suite("test_s3_tvf", "load_p0") { attributeList.add(new TvfAttribute(table, ["k00", "k01", "k02", "k03", "k04", "k05", "k06", "k07", "k08", "k09", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17", "k18"], "WHERE c1 > 50", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "c1 as k00,c2 as k01,c3 as k02,c4 as k03,c5 as k04,c6 as k05,c7 as k06,c8 as k07,c9 as k08,c10 as k09,c11 as k10,c12 as k11,c13 as k12,c14 as k13,c15 as k14,c16 as k15,c17 as k16,c18 as k17,c19 as k18, to_bitmap(c6) as k19, HLL_HASH(c6) as k20, TO_QUANTILE_STATE(c5, 1.0) as k21, to_bitmap(c6) as kd19, HLL_HASH(c6) as kd20, TO_QUANTILE_STATE(c5, 1.0) as kd21", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18,k19,k20,k21,kd19,kd20,kd21" ,"WHERE c1 > 50", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, ["k00", "k01", "k02", "k03", "k04", "k05", "k06", "k07", "k08", "k09", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17"], "WHERE c1 > 50", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_array_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } for(String table : uniqTable) { attributeList.add(new TvfAttribute(table, ["k00", "k01", "k02", "k03", "k04", "k05", "k06", "k07", "k08", "k09", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17", "k18"], "", "ORDER BY c1") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.csv") .addProperty("format", "csv") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } for(String table : basicTables) { attributeList.add(new TvfAttribute(table, "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18","k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18", "", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.parq") .addProperty("format", "parquet") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18, to_bitmap(k05) as k19, HLL_HASH(k05) as k20, TO_QUANTILE_STATE(k04, 1.0) as k21, to_bitmap(k05) as kd19, HLL_HASH(k05) as kd20, TO_QUANTILE_STATE(k04, 1.0) as kd21", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18,k19,k20,k21,kd19,kd20,kd21" ,"", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.parq") .addProperty("format", "parquet") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17", "", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_array_data.parq") .addProperty("format", "parquet") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } for(String table : basicTables) { attributeList.add(new TvfAttribute(table, "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18","k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18", "", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.orc") .addProperty("format", "orc") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18, to_bitmap(k05) as k19, HLL_HASH(k05) as k20, TO_QUANTILE_STATE(k04, 1.0) as k21, to_bitmap(k05) as kd19, HLL_HASH(k05) as kd20, TO_QUANTILE_STATE(k04, 1.0) as kd21", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18,k19,k20,k21,kd19,kd20,kd21" ,"", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_data.orc") .addProperty("format", "orc") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17", "", "") .addProperty("uri", "s3://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/data/basic_array_data.orc") .addProperty("format", "orc") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } for(String table : basicTables) { @@ -320,7 +342,8 @@ suite("test_s3_tvf", "load_p0") { .addProperty("format", "json") .addProperty("read_json_by_line", "false") .addProperty("strip_outer_array", "true") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18, to_bitmap(k05) as k19, HLL_HASH(k05) as k20, TO_QUANTILE_STATE(k04, 1.0) as k21, to_bitmap(k05) as kd19, HLL_HASH(k05) as kd20, TO_QUANTILE_STATE(k04, 1.0) as kd21", @@ -329,7 +352,8 @@ suite("test_s3_tvf", "load_p0") { .addProperty("format", "json") .addProperty("read_json_by_line", "false") .addProperty("strip_outer_array", "true") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17", "", "") @@ -337,7 +361,8 @@ suite("test_s3_tvf", "load_p0") { .addProperty("format", "json") .addProperty("read_json_by_line", "false") .addProperty("strip_outer_array", "true") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } for(String table : basicTables) { @@ -346,7 +371,8 @@ suite("test_s3_tvf", "load_p0") { .addProperty("format", "json") .addProperty("read_json_by_line", "true") .addProperty("strip_outer_array", "false") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } attributeList.add(new TvfAttribute("agg_tbl_basic_tvf", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18, to_bitmap(k05) as k19, HLL_HASH(k05) as k20, TO_QUANTILE_STATE(k04, 1.0) as k21, to_bitmap(k05) as kd19, HLL_HASH(k05) as kd20, TO_QUANTILE_STATE(k04, 1.0) as kd21", @@ -355,7 +381,8 @@ suite("test_s3_tvf", "load_p0") { .addProperty("format", "json") .addProperty("read_json_by_line", "true") .addProperty("strip_outer_array", "false") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) for(String table : arrayTables) { attributeList.add(new TvfAttribute(table, "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17", "k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17", "", "") @@ -363,7 +390,8 @@ suite("test_s3_tvf", "load_p0") { .addProperty("format", "json") .addProperty("read_json_by_line", "true") .addProperty("strip_outer_array", "false") - .addProperty("column_separator", "|")) + .addProperty("column_separator", "|") + .addProperty("force_parsing_by_standard_uri", "true")) } def ak = getS3AK() @@ -483,4 +511,4 @@ class TvfAttribute { properties.put(k, v) return this } -} \ No newline at end of file +}