[fix](datax)doris writer url decoder fix #22401

When the user imports data, there are some special characters in the data, which will cause the import to fail
The following error message appears:

2023-07-28 15:15:28.960  INFO 21756 --- [-interval-flush] c.a.d.p.w.d.DorisWriterManager           : Doris interval Sinking triggered: label[datax_doris_writer_7aa415e6-5a9c-4070-a699-70b4a627ae64].
2023-07-28 15:15:29.015  INFO 21756 --- [       Thread-3] c.a.d.p.w.d.DorisStreamLoadObserver      : Start to join batch data: rows[95968] bytes[3815834] label[datax_doris_writer_7aa415e6-5a9c-4070-a699-70b4a627ae64].
2023-07-28 15:15:29.038  INFO 21756 --- [       Thread-3] c.a.d.p.w.d.DorisStreamLoadObserver      : Executing stream load to: 'http://10.38.60.218:8030/api/ods_prod/ods_pexweb_online_product/_stream_load', size: '3911802'
2023-07-28 15:15:31.559  WARN 21756 --- [       Thread-3] c.a.d.p.w.d.DorisStreamLoadObserver      : Request failed with code:500
2023-07-28 15:15:31.561  INFO 21756 --- [       Thread-3] c.a.d.p.w.d.DorisStreamLoadObserver      : StreamLoad response :null
2023-07-28 15:15:31.564  WARN 21756 --- [       Thread-3] c.a.d.p.w.d.DorisWriterManager           : Failed to flush batch data to Doris, retry times = 0

java.io.IOException: Unable to flush data to Doris: unknown result status.
	at com.alibaba.datax.plugin.writer.doriswriter.DorisStreamLoadObserver.streamLoad(DorisStreamLoadObserver.java:66) ~[doriswriter-0.0.1-SNAPSHOT.jar:na]
	at com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager.asyncFlush(DorisWriterManager.java:163) [doriswriter-0.0.1-SNAPSHOT.jar:na]
	at com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager.access$000(DorisWriterManager.java:19) [doriswriter-0.0.1-SNAPSHOT.jar:na]
	at com.alibaba.datax.plugin.writer.doriswriter.DorisWriterManager$1.run(DorisWriterManager.java:134) [doriswriter-0.0.1-SNAPSHOT.jar:na]
	at java.lang.Thread.run(Thread.java:748) [na:1.8.0_221]

在fe.log日志中发现下面的错误信息:

ava.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - For input string: " l"
        at java.net.URLDecoder.decode(URLDecoder.java:194) ~[?:1.8.0_221]
        at org.springframework.http.converter.FormHttpMessageConverter.read(FormHttpMessageConverter.java:352) ~[spring-web-5.3.22.jar:5.3.22]
        at org.springframework.web.filter.FormContentFilter.parseIfNecessary(FormContentFilter.java:109) ~[spring-web-5.3.22.jar:5.3.22]
        at org.springframework.web.filter.FormContentFilter.doFilterInternal(FormContentFilter.java:88) ~[spring-web-5.3.22.jar:5.3.22]
        at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:117) ~[spring-web-5.3.22.jar:5.3.22]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
        at org.springframework.web.filter.CharacterEncodingFilter.doFilterInternal(CharacterEncodingFilter.java:201) ~[spring-web-5.3.22.jar:5.3.22]
        at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:117) ~[spring-web-5.3.22.jar:5.3.22]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:552) ~[jetty-servlet-9.4.48.v20220622.jar:9.4.48.v20220622]
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) ~[jetty-server-9.4.48.v20220622.jar:9.4.48.v20220622]
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:600) ~[jetty-security-9.4.48.v20220622.jar:9.4.48.v20220622]
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[jetty-server-9.4.48.v20220622.jar:9.4.48.v20220622]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandle
This commit is contained in:
jiafeng.zhang
2023-07-31 12:57:10 +08:00
committed by GitHub
parent b64f62647b
commit acc24df10a

View File

@ -64,6 +64,18 @@ public class DorisStreamLoadObserver {
this.options = options;
}
public String urlDecode(String outBuffer) {
String data = outBuffer;
try {
data = data.replaceAll("%(?![0-9a-fA-F]{2})", "%25");
data = data.replaceAll("\\+", "%2B");
data = URLDecoder.decode(data, "utf-8");
} catch (Exception e) {
e.printStackTrace();
}
return data;
}
public void streamLoad(WriterTuple data) throws Exception {
String host = getLoadHost();
if(host == null){
@ -77,6 +89,7 @@ public class DorisStreamLoadObserver {
.append("/_stream_load")
.toString();
LOG.info("Start to join batch data: rows[{}] bytes[{}] label[{}].", data.getRows().size(), data.getBytes(), data.getLabel());
loadUrl = urlDecode(loadUrl);
Map<String, Object> loadResult = put(loadUrl, data.getLabel(), addRows(data.getRows(), data.getBytes().intValue()));
LOG.info("StreamLoad response :{}",JSON.toJSONString(loadResult));
final String keyStatus = "Status";