175 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			175 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /**
 | |
|  * Copyright (c) 2021 OceanBase
 | |
|  * OceanBase CE is licensed under Mulan PubL v2.
 | |
|  * You can use this software according to the terms and conditions of the Mulan PubL v2.
 | |
|  * You may obtain a copy of Mulan PubL v2 at:
 | |
|  *          http://license.coscl.org.cn/MulanPubL-2.0
 | |
|  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | |
|  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | |
|  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | |
|  * See the Mulan PubL v2 for more details.
 | |
|  */
 | |
| 
 | |
| #define USING_LOG_PREFIX STORAGE
 | |
| 
 | |
| #include "csv_data_reader.h"
 | |
| 
 | |
| namespace oceanbase
 | |
| {
 | |
| using namespace common;
 | |
| 
 | |
| namespace blocksstable
 | |
| {
 | |
| 
 | |
| CSVDataReader::CSVDataReader()
 | |
| {
 | |
| }
 | |
| 
 | |
| CSVDataReader::~CSVDataReader()
 | |
| {
 | |
|   ifs_.close();
 | |
|   free(buf_);
 | |
| }
 | |
| 
 | |
| int CSVDataReader::init(const char *file)
 | |
| {
 | |
|   int ret = OB_SUCCESS;
 | |
|   ifs_.open(file);
 | |
|   if (ifs_.fail()) {
 | |
|     ret = OB_ERROR;
 | |
|     LOG_WARN("failed to open csv data file", K(ret));
 | |
|   } else {
 | |
|     buf_ = new char [MAX_STR_LEN];
 | |
|     buf_size_ = MAX_STR_LEN;
 | |
|   }
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| int CSVDataReader::next_data(std::vector<ObString> &data)
 | |
| {
 | |
|   int ret = OB_SUCCESS;
 | |
|   while (std::getline(ifs_, cur_line_)) {
 | |
|     if (!cur_line_.empty()) {
 | |
|       if (cur_line_.length() > buf_size_) {
 | |
|         free(buf_);
 | |
|         // next power of 2
 | |
|         buf_size_ = (cur_line_.length() + 1) * 2;
 | |
|         if (0 != (buf_size_ & (buf_size_ - 1))) {
 | |
|           while (0 != (buf_size_ & (buf_size_ - 1))) {
 | |
|             buf_size_ = buf_size_ & (buf_size_ - 1);
 | |
|           }
 | |
|         }
 | |
|         if (buf_size_ < cur_line_.length()) {
 | |
|           ret = OB_ERR_UNEXPECTED;
 | |
|           LOG_WARN("unexpected size", K(ret), K(buf_size_), K(cur_line_.length()));
 | |
|         } else {
 | |
|           buf_ = new char [buf_size_];
 | |
|         }
 | |
|       }
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
|   if (cur_line_.empty()) {
 | |
|     ret = OB_ITER_END;
 | |
|   } else if (OB_FAIL(parse_line())) {
 | |
|     LOG_WARN("failed to parse csv line", K(ret));
 | |
|   } else {
 | |
|     data.clear();
 | |
|     data.reserve(datas_.size());
 | |
|     for (int64_t i = 0; i < datas_.size(); i++) {
 | |
|       data.push_back(datas_.at(i));
 | |
|     }
 | |
|   }
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| int CSVDataReader::parse_line()
 | |
| {
 | |
|   int ret = OB_SUCCESS;
 | |
|   if (cur_line_.empty()) {
 | |
|     ret = OB_INVALID_ARGUMENT;
 | |
|     LOG_WARN("cur_line_ is empty", K(ret));
 | |
|   } else {
 | |
|     datas_.clear();
 | |
|     const char *p = cur_line_.c_str();
 | |
|     int64_t pos = 0;
 | |
|     int64_t begin = -1;
 | |
|     bool quote = false;
 | |
|     while (*p) {
 | |
|       if (quote) {
 | |
|         switch (*p) {
 | |
|           // according to ObHexEscapeSqlStr::to_string fun
 | |
|           case '\\':
 | |
|             p++;
 | |
|             if (!*p) {
 | |
|               ret = OB_ERROR;
 | |
|               LOG_WARN("unterminated string", K(ret));
 | |
|               return ret;
 | |
|             } else {
 | |
|               switch (*p) {
 | |
|                 case '\\':
 | |
|                 case '\'':
 | |
|                 case '\"':
 | |
|                   buf_[pos++] = *p;
 | |
|                   break;
 | |
|                 case 'n':
 | |
|                   buf_[pos++] = '\n';
 | |
|                   break;
 | |
|                 case '0':
 | |
|                   buf_[pos++] = '\0';
 | |
|                   break;
 | |
|                 case 'r':
 | |
|                   buf_[pos++] = '\r';
 | |
|                   break;
 | |
|                 case 't':
 | |
|                   buf_[pos++] = '\t';
 | |
|                   break;
 | |
|                 default:
 | |
|                   ret = OB_ERROR;
 | |
|                   LOG_WARN("unexpected char", K(ret), K(*p));
 | |
|                   break;
 | |
|               }
 | |
|             }
 | |
|             p++;
 | |
|             break;
 | |
|           case '\'':
 | |
|             buf_[pos++] = *(p++);
 | |
|             if (*p == ',') {
 | |
|               quote = false;
 | |
|               datas_.push_back(ObString(pos - begin, &buf_[begin]));
 | |
|               begin = -1;
 | |
|             }
 | |
|             break;
 | |
|           default:
 | |
|             buf_[pos++] = *(p++);
 | |
|         }
 | |
|       } else {
 | |
|         switch (*p) {
 | |
|           case ',':
 | |
|             if (-1 != begin) {
 | |
|               datas_.push_back(ObString(pos - begin, &buf_[begin]));
 | |
|               begin = -1;
 | |
|             }
 | |
|             p++;
 | |
|             break;
 | |
|           case '\'':
 | |
|             quote = true;
 | |
|           default:
 | |
|             if (-1 == begin) {
 | |
|               begin = pos;
 | |
|             }
 | |
|             buf_[pos++] = *(p++);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     if (-1 != begin) {
 | |
|       datas_.push_back(ObString(pos - begin, &buf_[begin]));
 | |
|       begin = -1;
 | |
|     }
 | |
|   }
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| }
 | |
| }
 | 
