fix csv_scan_node bug (#115)

This commit is contained in:
LingBin
2017-09-21 20:48:58 +08:00
committed by morningman
parent adb3213314
commit 41226344ce

View File

@ -171,8 +171,8 @@ Status CsvScanNode::prepare(RuntimeState* state) {
}
_hll_column_num++;
}
// NOTE: not all the columns in '_column' is exist in table schema
// NOTE: not all the columns in '_columns' is exist in table schema
if (_columns.end() != std::find(_columns.begin(), _columns.end(), column_name)) {
_column_slot_map[column_name] = slot;
} else {
@ -185,8 +185,6 @@ Status CsvScanNode::prepare(RuntimeState* state) {
_unspecified_columns.end(),
column_name)) {
_column_slot_map[column_name] = slot;
} else {
_column_slot_map[column_name] = nullptr;
}
}
@ -204,7 +202,6 @@ Status CsvScanNode::prepare(RuntimeState* state) {
const std::string& column_name = _unspecified_columns[i];
SlotDescriptor* slot = _column_slot_map[column_name];
_unspecified_colomn_slot_vec.push_back(slot);
_unspecified_colomn_type_vec.push_back(_column_type_map[column_name]);
}
@ -571,12 +568,12 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state)
if (!slot->is_materialized()) {
continue;
}
if (slot->type().type == TYPE_HLL) {
continue;
}
}
const TColumnType& column_type = _column_type_vec[i];
const TColumnType& column_type = _column_type_vec[i];
bool flag = check_and_write_text_slot(
column_name, column_type,
fields[i].c_str(),
@ -602,9 +599,9 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state)
if (slot->type().type == TYPE_HLL) {
continue;
}
const TColumnType& column_type = _unspecified_colomn_type_vec[i];
}
const TColumnType& column_type = _unspecified_colomn_type_vec[i];
bool flag = check_and_write_text_slot(
column_name, column_type,
_default_values[i].c_str(),
@ -624,7 +621,7 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state)
const std::string& column_name = iter->first;
const SlotDescriptor* slot = _column_slot_map[column_name];
const TColumnType& column_type = _column_type_map[column_name];
std::string column_string = "";
std::string column_string = "";
const char* src = fields[function.param_column_index].c_str();
int src_column_len = fields[function.param_column_index].length();
hll_hash(src, src_column_len, &column_string);
@ -632,12 +629,12 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state)
column_name, column_type,
column_string.c_str(),
column_string.length(),
slot, state, &error_msg);
slot, state, &error_msg);
if (flag == false) {
_runtime_state->append_error_msg_to_file(line, error_msg.str());
return false;
}
}
}
}
return true;
}
@ -650,12 +647,12 @@ bool CsvScanNode::check_hll_function(TMiniLoadEtlFunction& function) {
}
return true;
}
void CsvScanNode::hll_hash(const char* src, int len, std::string* result) {
std::string str(src, len);
if (str != "\\N") {
uint64_t hash = HashUtil::murmur_hash64A(src, len, HashUtil::MURMUR_SEED);
char buf[11];
char buf[11];
memset(buf, 0, 11);
// expliclit set
buf[0] = HLL_DATA_EXPLICIT;
@ -664,11 +661,11 @@ void CsvScanNode::hll_hash(const char* src, int len, std::string* result) {
*result = std::string(buf, 11);
} else {
char buf[2];
memset(buf, 0, 2);
memset(buf, 0, 2);
// empty set
buf[0] = HLL_DATA_EMPTY;
*result = std::string(buf, 2);
}
}
}
} // end namespace palo