Skip to content

Commit ffbc0fd

Browse files
Jibing-LiFreeOnePlus
authored andcommitted
[fix](new-scan)Fix new scanner load job bugs (apache#12903)
Fix bugs: 1. Fe need to send file format (e.g. parquet, orc ...) to be while processing load jobs using new scanner. 2. Try to get parquet file column type from SchemaElement.type before getting from Logical type and Converted type.
1 parent 1c1e688 commit ffbc0fd

File tree

3 files changed

+31
-2
lines changed

3 files changed

+31
-2
lines changed

be/src/vec/exec/format/parquet/schema_desc.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,37 @@ void FieldDescriptor::parse_physical_field(const tparquet::SchemaElement& physic
152152
physical_field->physical_type = physical_schema.type;
153153
_physical_fields.push_back(physical_field);
154154
physical_field->physical_column_index = _physical_fields.size() - 1;
155+
physical_field->type = get_doris_type(physical_schema);
156+
}
157+
158+
TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& physical_schema) {
159+
TypeDescriptor type;
160+
switch (physical_schema.type) {
161+
case tparquet::Type::BOOLEAN:
162+
type.type = TYPE_BOOLEAN;
163+
return type;
164+
case tparquet::Type::INT32:
165+
type.type = TYPE_INT;
166+
return type;
167+
case tparquet::Type::INT64:
168+
case tparquet::Type::INT96:
169+
type.type = TYPE_BIGINT;
170+
return type;
171+
case tparquet::Type::FLOAT:
172+
type.type = TYPE_FLOAT;
173+
return type;
174+
case tparquet::Type::DOUBLE:
175+
type.type = TYPE_DOUBLE;
176+
return type;
177+
default:
178+
break;
179+
}
155180
if (physical_schema.__isset.logicalType) {
156-
physical_field->type = convert_to_doris_type(physical_schema.logicalType);
181+
type = convert_to_doris_type(physical_schema.logicalType);
157182
} else if (physical_schema.__isset.converted_type) {
158-
physical_field->type = convert_to_doris_type(physical_schema.converted_type);
183+
type = convert_to_doris_type(physical_schema.converted_type);
159184
}
185+
return type;
160186
}
161187

162188
TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logicalType) {

be/src/vec/exec/format/parquet/schema_desc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ class FieldDescriptor {
8282

8383
TypeDescriptor convert_to_doris_type(tparquet::ConvertedType::type convertedType);
8484

85+
TypeDescriptor get_doris_type(const tparquet::SchemaElement& physical_schema);
86+
8587
public:
8688
FieldDescriptor() = default;
8789
~FieldDescriptor() = default;

fe/fe-core/src/main/java/org/apache/doris/planner/external/LoadScanProvider.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ public ParamCreateContext createContext(Analyzer analyzer) throws UserException
9393
ctx.timezone = analyzer.getTimezone();
9494

9595
TFileScanRangeParams params = new TFileScanRangeParams();
96+
params.format_type = formatType(fileGroupInfo.getFileGroup().getFileFormat(), "");
9697
params.setStrictMode(fileGroupInfo.isStrictMode());
9798
params.setProperties(fileGroupInfo.getBrokerDesc().getProperties());
9899
if (fileGroupInfo.getBrokerDesc().getFileType() == TFileType.FILE_HDFS) {

0 commit comments

Comments
 (0)