From 4ac38ca67adce44ba2e8a574d052dcd6ea342a11 Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Tue, 13 Jun 2023 08:57:01 +0800 Subject: [PATCH] [typo](docs) add a python example for stream load. (#20697) --- .../import/import-way/stream-load-manual.md | 16 ++++ .../import/import-way/stream-load-manual.md | 16 ++++ samples/stream_load/python/DorisStreamLoad.py | 86 +++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 samples/stream_load/python/DorisStreamLoad.py diff --git a/docs/en/docs/data-operate/import/import-way/stream-load-manual.md b/docs/en/docs/data-operate/import/import-way/stream-load-manual.md index 4fa8494eae..2842ed78de 100644 --- a/docs/en/docs/data-operate/import/import-way/stream-load-manual.md +++ b/docs/en/docs/data-operate/import/import-way/stream-load-manual.md @@ -382,6 +382,22 @@ Cluster situation: The concurrency of Stream load is not affected by cluster siz curl --location-trusted -u user:password -T /home/store_sales -H "label:abc" http://abc.com:8030/api/bj_sales/store_sales/_stream_load ``` +### Coding with StreamLoad + +You can initiate HTTP requests for Stream Load using any language. Before initiating HTTP requests, you need to set several necessary headers: + +```http +Content-Type: text/plain; charset=UTF-8 +Expect: 100-continue +Authorization: Basic +``` + +``: a string consist with Doris's `username`, `:` and `password` and then do a base64 encode. + +Additionally, it should be noted that if you directly initiate an HTTP request to FE, as Doris will redirect to BE, some frameworks will remove the `Authorization` HTTP header during this process, which requires manual processing. + +Doris provides StreamLoad examples in three languages: [Java](https://github.com/apache/doris/tree/master/samples/stream_load/java), [Go](https://github.com/apache/doris/tree/master/samples/stream_load/go), and [Python](https://github.com/apache/doris/tree/master/samples/stream_load/python) for reference. + ## Common Questions * Label Already Exists diff --git a/docs/zh-CN/docs/data-operate/import/import-way/stream-load-manual.md b/docs/zh-CN/docs/data-operate/import/import-way/stream-load-manual.md index 5fb3d95e63..a7e1c6463d 100644 --- a/docs/zh-CN/docs/data-operate/import/import-way/stream-load-manual.md +++ b/docs/zh-CN/docs/data-operate/import/import-way/stream-load-manual.md @@ -397,6 +397,22 @@ timeout = 1000s 等于 10G / 10M/s curl --location-trusted -u user:password -T /home/store_sales -H "label:abc" http://abc.com:8030/api/bj_sales/store_sales/_stream_load ``` +### 使用代码调用 StreamLoad + +你可以使用任意代码发起 http 请求进行 Stream Load,在发起 http 请求前,需要设置几个必要的 Header: + +```http +Content-Type: text/plain; charset=UTF-8 +Expect: 100-continue +Authorization: Basic +``` + +其中,``是指 Doris 的`username`+`:`+`password`拼接成的字符串进行 base64 编码后得到的值。 + +另外,需要注意的是,如果你直接对 FE 发起 http 请求,由于 Doris 会重定向到 BE,在这个过程中,某些框架会把`Authorization`这个 http Header 移除掉,这个时候需要你进行手动处理。 + +Doris 提供了 [Java](https://github.com/apache/doris/tree/master/samples/stream_load/java)、[Go](https://github.com/apache/doris/tree/master/samples/stream_load/go)、[Python](https://github.com/apache/doris/tree/master/samples/stream_load/python) 三种语言的 StreamLoad Example 供参考。 + ## 常见问题 - Label Already Exists diff --git a/samples/stream_load/python/DorisStreamLoad.py b/samples/stream_load/python/DorisStreamLoad.py new file mode 100644 index 0000000000..cb14150443 --- /dev/null +++ b/samples/stream_load/python/DorisStreamLoad.py @@ -0,0 +1,86 @@ +# coding=utf-8 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import requests +from requests.auth import HTTPBasicAuth +import base64 + +# This script is a python demo for doris stream load +# +# How to use: +# 1. create a table in doris with any mysql client +# CREATE TABLE `db0`.`t_user` ( +# `id` int, +# `name` string +# ) +# DUPLICATE KEY(`id`) +# DISTRIBUTED BY HASH(`id`) BUCKETS 1 +# PROPERTIES ( +# "replication_num" = "1" +# ); +# +# 2. change the Doris cluster, db, user config in this class +# +# 3. run this script, you should see the following output: +# +# 200 OK +# { +# "TxnId": 14017, +# "Label": "2486da70-94bb-47cc-a810-70791add2b8c", +# "TwoPhaseCommit": "false", +# "Status": "Success", +# "Message": "OK", +# "NumberTotalRows": 2, +# "NumberLoadedRows": 2, +# "NumberFilteredRows": 0, +# "NumberUnselectedRows": 0, +# "LoadBytes": 13, +# "LoadTimeMs": 54, +# "BeginTxnTimeMs": 1, +# "StreamLoadPutTimeMs": 12, +# "ReadDataTimeMs": 0, +# "WriteDataTimeMs": 11, +# "CommitAndPublishTimeMs": 28 +# } +if __name__ == '__main__': + database, table = 'db0', 't_user' + username, password = 'root', '' + url = 'http://127.0.0.1:8030/api/%s/%s/_stream_load' % (database, table) + headers = { + 'Content-Type': 'text/plain; charset=UTF-8', + # 'label': 'your_custom_label', + 'format': 'csv', + "column_separator": ',', + 'Expect': '100-continue', + # 'Authorization': 'Basic ' + base64.b64encode((username + ':' + password).encode('utf-8')).decode('ascii') + } + auth = HTTPBasicAuth(username, password) + session = requests.sessions.Session() + session.should_strip_auth = lambda old_url, new_url: False # Don't strip auth + + data='1,Tom\n2,Jelly' + + resp = session.request( + 'PUT', url=url, + data=data, # open('/path/to/your/data.csv', 'rb'), + headers=headers, auth=auth + ) + + print(resp.status_code, resp.reason) + print(resp.text)