Files
doris/be/src/exec/csv_scanner.cpp
2017-08-11 17:51:21 +08:00

95 lines
3.0 KiB
C++

// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "exec/csv_scanner.h"
#include <boost/algorithm/string.hpp>
namespace palo {
CsvScanner::CsvScanner(const std::vector<std::string>& csv_file_paths) :
_is_open(false),
_file_paths(csv_file_paths),
_current_file(nullptr),
_current_file_idx(0){
// do nothing
}
CsvScanner::~CsvScanner() {
// close file
if (_current_file != nullptr) {
if (_current_file->is_open()) {
_current_file->close();
}
delete _current_file;
_current_file = nullptr;
}
}
Status CsvScanner::open() {
VLOG(1) << "CsvScanner::Connect";
if (_is_open) {
LOG(INFO) << "this scanner already opened";
return Status::OK;
}
if (_file_paths.empty()) {
return Status("no file specified.");
}
_is_open = true;
return Status::OK;
}
// TODO(lingbin): read more than one line at a time to reduce IO comsumption
Status CsvScanner::get_next_row(std::string* line_str, bool* eos) {
if (_current_file == nullptr && _current_file_idx == _file_paths.size()) {
*eos = true;
return Status::OK;
}
if (_current_file == nullptr && _current_file_idx < _file_paths.size()) {
std::string& file_path = _file_paths[_current_file_idx];
LOG(INFO) << "open csv file: [" << _current_file_idx << "] " << file_path;
_current_file = new std::ifstream(file_path, std::ifstream::in);
if (!_current_file->is_open()) {
return Status("Fail to read csv file: " + file_path);
}
++_current_file_idx;
}
getline(*_current_file, *line_str);
if (_current_file->eof()) {
_current_file->close();
delete _current_file;
_current_file = nullptr;
if (_current_file_idx == _file_paths.size()) {
*eos = true;
return Status::OK;
}
}
*eos = false;
return Status::OK;
}
} // end namespace palo