[Feat](OSS)Decouple COS and OBS dependencies with optional inclusion mechanism (#39831)

…

improve the flexibility of the project by decoupling direct dependencies
on the hadoop-cos and hadoop-huaweicloud libraries. These changes allow
users to control whether COS and OBS dependencies are included in the
final build, enabling a more customizable setup.

## Proposed changes

Issue Number: close #xxx

<!--Describe your changes.-->
This commit is contained in:
Calvin Kirs
2024-09-09 16:50:12 +08:00
committed by GitHub
parent 44a7efff4f
commit 2235c1abd3
5 changed files with 91 additions and 13 deletions

View File

@ -410,6 +410,7 @@ under the License.
<groupId>com.huaweicloud</groupId>
<artifactId>hadoop-huaweicloud</artifactId>
<version>${huaweiobs.version}</version>
<scope>${obs.dependency.scope}</scope>
<exclusions>
<exclusion>
<artifactId>jackson-databind</artifactId>
@ -425,6 +426,7 @@ under the License.
<groupId>com.qcloud.cos</groupId>
<artifactId>hadoop-cos</artifactId>
<version>${tencentcos.version}</version>
<scope>${cos.dependency.scope}</scope>
<exclusions>
<exclusion>
<groupId>org.json</groupId>
@ -666,6 +668,7 @@ under the License.
<artifactId>gcs-connector</artifactId>
<version>hadoop2-2.2.8</version>
<classifier>shaded</classifier>
<scope>${gcs.dependency.scope}</scope>
<exclusions>
<exclusion>
<groupId>*</groupId>

View File

@ -38,11 +38,7 @@ import com.aliyun.datalake.metastore.common.DataLakeConfig;
import com.amazonaws.glue.catalog.util.AWSGlueConfig;
import com.google.common.base.Strings;
import com.google.common.collect.Maps;
import org.apache.hadoop.fs.CosFileSystem;
import org.apache.hadoop.fs.CosNConfigKeys;
import org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem;
import org.apache.hadoop.fs.obs.OBSConstants;
import org.apache.hadoop.fs.obs.OBSFileSystem;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider;
@ -164,12 +160,12 @@ public class PropertyConverter {
private static Map<String, String> convertToOBSProperties(Map<String, String> props,
CloudCredential credential) {
Map<String, String> obsProperties = Maps.newHashMap();
obsProperties.put(OBSConstants.ENDPOINT, props.get(ObsProperties.ENDPOINT));
obsProperties.put(ObsProperties.HadoopFsObsConstants.ENDPOINT, props.get(ObsProperties.ENDPOINT));
obsProperties.put(ObsProperties.FS.IMPL_DISABLE_CACHE, "true");
obsProperties.put("fs.obs.impl", getHadoopFSImplByScheme("obs"));
if (credential.isWhole()) {
obsProperties.put(OBSConstants.ACCESS_KEY, credential.getAccessKey());
obsProperties.put(OBSConstants.SECRET_KEY, credential.getSecretKey());
obsProperties.put(ObsProperties.HadoopFsObsConstants.ACCESS_KEY, credential.getAccessKey());
obsProperties.put(ObsProperties.HadoopFsObsConstants.SECRET_KEY, credential.getSecretKey());
}
if (credential.isTemporary()) {
obsProperties.put(ObsProperties.FS.SESSION_TOKEN, credential.getSessionToken());
@ -184,11 +180,11 @@ public class PropertyConverter {
public static String getHadoopFSImplByScheme(String fsScheme) {
if (fsScheme.equalsIgnoreCase("obs")) {
return OBSFileSystem.class.getName();
return ObsProperties.HadoopFsObsConstants.HADOOP_FS_OBS_CLASS_NAME;
} else if (fsScheme.equalsIgnoreCase("oss")) {
return AliyunOSSFileSystem.class.getName();
} else if (fsScheme.equalsIgnoreCase("cosn") || fsScheme.equalsIgnoreCase("lakefs")) {
return CosFileSystem.class.getName();
return CosProperties.HadoopFsCosConstants.HADOOP_FS_COS_CLASS_NAME;
} else {
return S3AFileSystem.class.getName();
}
@ -350,13 +346,16 @@ public class PropertyConverter {
private static Map<String, String> convertToCOSProperties(Map<String, String> props, CloudCredential credential) {
Map<String, String> cosProperties = Maps.newHashMap();
cosProperties.put(CosNConfigKeys.COSN_ENDPOINT_SUFFIX_KEY, props.get(CosProperties.ENDPOINT));
cosProperties.put(CosProperties.HadoopFsCosConstants.COSN_ENDPOINT_SUFFIX_KEY,
props.get(CosProperties.ENDPOINT));
cosProperties.put("fs.cosn.impl.disable.cache", "true");
cosProperties.put("fs.cosn.impl", getHadoopFSImplByScheme("cosn"));
cosProperties.put("fs.lakefs.impl", getHadoopFSImplByScheme("lakefs"));
if (credential.isWhole()) {
cosProperties.put(CosNConfigKeys.COSN_USERINFO_SECRET_ID_KEY, credential.getAccessKey());
cosProperties.put(CosNConfigKeys.COSN_USERINFO_SECRET_KEY_KEY, credential.getSecretKey());
cosProperties.put(CosProperties.HadoopFsCosConstants.COSN_USERINFO_SECRET_ID_KEY,
credential.getAccessKey());
cosProperties.put(CosProperties.HadoopFsCosConstants.COSN_USERINFO_SECRET_KEY_KEY,
credential.getSecretKey());
}
// session token is unsupported
for (Map.Entry<String, String> entry : props.entrySet()) {

View File

@ -34,8 +34,44 @@ public class CosProperties extends BaseProperties {
public static final String SESSION_TOKEN = "cos.session_token";
public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT, ACCESS_KEY, SECRET_KEY);
public static CloudCredential getCredential(Map<String, String> props) {
return getCloudCredential(props, ACCESS_KEY, SECRET_KEY, SESSION_TOKEN);
}
/**
* This class contains constants related to the COS (Tencent Cloud Object Storage) properties.
* <p>
* The constants in the `HadoopFsCosConstants` inner class are copied from
* `org.apache.hadoop.fs.CosNConfigKeys`. This approach is intentionally taken to
* avoid a compile-time dependency on the `hadoop-cos` library. By doing so, we
* ensure that this project remains decoupled from `hadoop-cos`, allowing it to be
* compiled and built independently.
* <p>
* We can control whether to include COS-related dependencies by configuring
* a build parameter. By default, the COS-related dependencies are not included in
* the packaging process. If the package does not contain these dependencies but
* the functionality related to Hadoop COS is required, users will need to manually
* copy the relevant dependencies into the `fe/lib` directory.
* <p>
* However, since this is not an uberjar and the required dependencies are not bundled
* together, manually copying dependencies is not recommended due to potential
* issues such as version conflicts or missing transitive dependencies.
* <p>
* Users are encouraged to configure the build process to include the necessary
* dependencies when Hadoop COS support is required, ensuring a smoother
* and more reliable deployment.
* <p>
* Additionally, by copying these constants instead of directly depending on
* `hadoop-cos`, there is an additional maintenance overhead. Any changes in
* `CosNConfigKeys` in future versions of `hadoop-cos` will not be automatically
* reflected here. It is important to manually track and update these constants
* as needed to ensure compatibility.
*/
public static class HadoopFsCosConstants {
public static final String HADOOP_FS_COS_CLASS_NAME = "org.apache.hadoop.fs.CosFileSystem";
public static final String COSN_ENDPOINT_SUFFIX_KEY = "fs.cosn.bucket.endpoint_suffix";
public static final String COSN_USERINFO_SECRET_ID_KEY = "fs.cosn.userinfo.secretId";
public static final String COSN_USERINFO_SECRET_KEY_KEY = "fs.cosn.userinfo.secretKey";
}
}

View File

@ -43,4 +43,41 @@ public class ObsProperties extends BaseProperties {
public static CloudCredential getCredential(Map<String, String> props) {
return getCloudCredential(props, ACCESS_KEY, SECRET_KEY, SESSION_TOKEN);
}
/**
* This class contains constants related to the OBS (Hua Wei Object Storage Service) properties.
* <p>
* The constants in the `HadoopFsObsConstants` inner class are copied from
* `org.apache.hadoop.fs.obs.OBSConstants`. This approach is deliberately taken to
* avoid a compile-time dependency on the `hadoop-huaweicloud` library. By doing so, we
* ensure that this project remains decoupled from `hadoop-obs`, allowing it to be
* compiled and built independently.
* <p>
* Similar to the Obs properties, we can control whether to include OBS-related
* dependencies by configuring a build parameter. By default, the OBS-related
* dependencies are not included in the packaging process. If the package does not
* contain these dependencies but the functionality related to Hadoop OBS is required,
* users will need to manually copy the relevant dependencies into the `fe/lib` directory.
* <p>
* However, manually copying dependencies is not recommended since this is not an
* uberjar, and there could be potential issues such as version conflicts or missing
* transitive dependencies.
* <p>
* Users are encouraged to configure the build process to include the necessary
* dependencies when Hadoop OBS support is required, ensuring a smoother
* and more reliable deployment.
* <p>
* Additionally, by copying these constants instead of directly depending on
* `hadoop-huaweicloud`, there is an additional maintenance overhead. Any changes in
* `OBSConstants` in future versions of `hadoop-obs` will not be automatically
* reflected here. It is important to manually track and update these constants
* as needed to ensure compatibility.
*/
public static class HadoopFsObsConstants {
public static final String HADOOP_FS_OBS_CLASS_NAME = "org.apache.hadoop.fs.obs.OBSFileSystem";
public static final String ENDPOINT = "fs.obs.endpoint";
public static final String ACCESS_KEY = "fs.obs.access.key";
public static final String SECRET_KEY = "fs.obs.secret.key";
}
}

View File

@ -227,6 +227,9 @@ under the License.
<!--suppress UnresolvedMavenProperty -->
<doris.home>${fe.dir}/../</doris.home>
<revision>1.2-SNAPSHOT</revision>
<obs.dependency.scope>compile</obs.dependency.scope>
<cos.dependency.scope>compile</cos.dependency.scope>
<gcs.dependency.scope>compile</gcs.dependency.scope>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<doris.hive.catalog.shade.version>2.1.1</doris.hive.catalog.shade.version>
<maven.compiler.source>1.8</maven.compiler.source>