175 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			175 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/**
 | 
						|
 * Copyright (c) 2021 OceanBase
 | 
						|
 * OceanBase CE is licensed under Mulan PubL v2.
 | 
						|
 * You can use this software according to the terms and conditions of the Mulan PubL v2.
 | 
						|
 * You may obtain a copy of Mulan PubL v2 at:
 | 
						|
 *          http://license.coscl.org.cn/MulanPubL-2.0
 | 
						|
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
						|
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
						|
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
						|
 * See the Mulan PubL v2 for more details.
 | 
						|
 */
 | 
						|
 | 
						|
#define USING_LOG_PREFIX STORAGE
 | 
						|
 | 
						|
#include "csv_data_reader.h"
 | 
						|
 | 
						|
namespace oceanbase
 | 
						|
{
 | 
						|
using namespace common;
 | 
						|
 | 
						|
namespace blocksstable
 | 
						|
{
 | 
						|
 | 
						|
CSVDataReader::CSVDataReader()
 | 
						|
{
 | 
						|
}
 | 
						|
 | 
						|
CSVDataReader::~CSVDataReader()
 | 
						|
{
 | 
						|
  ifs_.close();
 | 
						|
  free(buf_);
 | 
						|
}
 | 
						|
 | 
						|
int CSVDataReader::init(const char *file)
 | 
						|
{
 | 
						|
  int ret = OB_SUCCESS;
 | 
						|
  ifs_.open(file);
 | 
						|
  if (ifs_.fail()) {
 | 
						|
    ret = OB_ERROR;
 | 
						|
    LOG_WARN("failed to open csv data file", K(ret));
 | 
						|
  } else {
 | 
						|
    buf_ = new char [MAX_STR_LEN];
 | 
						|
    buf_size_ = MAX_STR_LEN;
 | 
						|
  }
 | 
						|
  return ret;
 | 
						|
}
 | 
						|
 | 
						|
int CSVDataReader::next_data(std::vector<ObString> &data)
 | 
						|
{
 | 
						|
  int ret = OB_SUCCESS;
 | 
						|
  while (std::getline(ifs_, cur_line_)) {
 | 
						|
    if (!cur_line_.empty()) {
 | 
						|
      if (cur_line_.length() > buf_size_) {
 | 
						|
        free(buf_);
 | 
						|
        // next power of 2
 | 
						|
        buf_size_ = (cur_line_.length() + 1) * 2;
 | 
						|
        if (0 != (buf_size_ & (buf_size_ - 1))) {
 | 
						|
          while (0 != (buf_size_ & (buf_size_ - 1))) {
 | 
						|
            buf_size_ = buf_size_ & (buf_size_ - 1);
 | 
						|
          }
 | 
						|
        }
 | 
						|
        if (buf_size_ < cur_line_.length()) {
 | 
						|
          ret = OB_ERR_UNEXPECTED;
 | 
						|
          LOG_WARN("unexpected size", K(ret), K(buf_size_), K(cur_line_.length()));
 | 
						|
        } else {
 | 
						|
          buf_ = new char [buf_size_];
 | 
						|
        }
 | 
						|
      }
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  if (cur_line_.empty()) {
 | 
						|
    ret = OB_ITER_END;
 | 
						|
  } else if (OB_FAIL(parse_line())) {
 | 
						|
    LOG_WARN("failed to parse csv line", K(ret));
 | 
						|
  } else {
 | 
						|
    data.clear();
 | 
						|
    data.reserve(datas_.size());
 | 
						|
    for (int64_t i = 0; i < datas_.size(); i++) {
 | 
						|
      data.push_back(datas_.at(i));
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return ret;
 | 
						|
}
 | 
						|
 | 
						|
int CSVDataReader::parse_line()
 | 
						|
{
 | 
						|
  int ret = OB_SUCCESS;
 | 
						|
  if (cur_line_.empty()) {
 | 
						|
    ret = OB_INVALID_ARGUMENT;
 | 
						|
    LOG_WARN("cur_line_ is empty", K(ret));
 | 
						|
  } else {
 | 
						|
    datas_.clear();
 | 
						|
    const char *p = cur_line_.c_str();
 | 
						|
    int64_t pos = 0;
 | 
						|
    int64_t begin = -1;
 | 
						|
    bool quote = false;
 | 
						|
    while (*p) {
 | 
						|
      if (quote) {
 | 
						|
        switch (*p) {
 | 
						|
          // according to ObHexEscapeSqlStr::to_string fun
 | 
						|
          case '\\':
 | 
						|
            p++;
 | 
						|
            if (!*p) {
 | 
						|
              ret = OB_ERROR;
 | 
						|
              LOG_WARN("unterminated string", K(ret));
 | 
						|
              return ret;
 | 
						|
            } else {
 | 
						|
              switch (*p) {
 | 
						|
                case '\\':
 | 
						|
                case '\'':
 | 
						|
                case '\"':
 | 
						|
                  buf_[pos++] = *p;
 | 
						|
                  break;
 | 
						|
                case 'n':
 | 
						|
                  buf_[pos++] = '\n';
 | 
						|
                  break;
 | 
						|
                case '0':
 | 
						|
                  buf_[pos++] = '\0';
 | 
						|
                  break;
 | 
						|
                case 'r':
 | 
						|
                  buf_[pos++] = '\r';
 | 
						|
                  break;
 | 
						|
                case 't':
 | 
						|
                  buf_[pos++] = '\t';
 | 
						|
                  break;
 | 
						|
                default:
 | 
						|
                  ret = OB_ERROR;
 | 
						|
                  LOG_WARN("unexpected char", K(ret), K(*p));
 | 
						|
                  break;
 | 
						|
              }
 | 
						|
            }
 | 
						|
            p++;
 | 
						|
            break;
 | 
						|
          case '\'':
 | 
						|
            buf_[pos++] = *(p++);
 | 
						|
            if (*p == ',') {
 | 
						|
              quote = false;
 | 
						|
              datas_.push_back(ObString(pos - begin, &buf_[begin]));
 | 
						|
              begin = -1;
 | 
						|
            }
 | 
						|
            break;
 | 
						|
          default:
 | 
						|
            buf_[pos++] = *(p++);
 | 
						|
        }
 | 
						|
      } else {
 | 
						|
        switch (*p) {
 | 
						|
          case ',':
 | 
						|
            if (-1 != begin) {
 | 
						|
              datas_.push_back(ObString(pos - begin, &buf_[begin]));
 | 
						|
              begin = -1;
 | 
						|
            }
 | 
						|
            p++;
 | 
						|
            break;
 | 
						|
          case '\'':
 | 
						|
            quote = true;
 | 
						|
          default:
 | 
						|
            if (-1 == begin) {
 | 
						|
              begin = pos;
 | 
						|
            }
 | 
						|
            buf_[pos++] = *(p++);
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if (-1 != begin) {
 | 
						|
      datas_.push_back(ObString(pos - begin, &buf_[begin]));
 | 
						|
      begin = -1;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return ret;
 | 
						|
}
 | 
						|
 | 
						|
}
 | 
						|
}
 |