Skip to content

Commit 5928a9e

Browse files
liyafan82kszucs
authored andcommitted
ARROW-7073: [Java] Support concating vectors values in batch
We need a way to copy vector values in batch. Currently, we have copyFrom and copyFromSafe APIs. However, they are not enough, as copying values individually is not performant. Closes #5916 from liyafan82/fly_1125_veccat and squashes the following commits: 94b407c <liyafan82> Support dense union vector ee49dc6 <liyafan82> Add tests with null values ad33e23 <liyafan82> Rewrite tests with vector populator for result verification c89211a <liyafan82> Rewrite tests with vector populator and provide static utility 7c13ede <liyafan82> Support concating vectors values in batch Authored-by: liyafan82 <fan_li_ya@foxmail.com> Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
1 parent 3f0c41a commit 5928a9e

File tree

6 files changed

+863
-5
lines changed

6 files changed

+863
-5
lines changed

java/vector/src/main/codegen/templates/UnionVector.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -553,9 +553,13 @@ public Iterator<ValueVector> iterator() {
553553
return vectors.iterator();
554554
}
555555

556-
public ValueVector getVector(int index) {
557-
int type = typeBuffer.getByte(index * TYPE_WIDTH);
558-
switch (MinorType.values()[type]) {
556+
public ValueVector getVector(int index) {
557+
int type = typeBuffer.getByte(index * TYPE_WIDTH);
558+
return getVectorByType(type);
559+
}
560+
561+
public ValueVector getVectorByType(int typeId) {
562+
switch (MinorType.values()[typeId]) {
559563
case NULL:
560564
return null;
561565
<#list vv.types as type>
@@ -574,7 +578,7 @@ public ValueVector getVector(int index) {
574578
case LIST:
575579
return getList();
576580
default:
577-
throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[type]);
581+
throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[typeId]);
578582
}
579583
}
580584

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.vector.util;
19+
20+
import java.util.HashSet;
21+
22+
import org.apache.arrow.util.Preconditions;
23+
import org.apache.arrow.vector.BaseFixedWidthVector;
24+
import org.apache.arrow.vector.BaseVariableWidthVector;
25+
import org.apache.arrow.vector.BitVectorHelper;
26+
import org.apache.arrow.vector.NullVector;
27+
import org.apache.arrow.vector.ValueVector;
28+
import org.apache.arrow.vector.compare.TypeEqualsVisitor;
29+
import org.apache.arrow.vector.compare.VectorVisitor;
30+
import org.apache.arrow.vector.complex.DenseUnionVector;
31+
import org.apache.arrow.vector.complex.FixedSizeListVector;
32+
import org.apache.arrow.vector.complex.ListVector;
33+
import org.apache.arrow.vector.complex.NonNullableStructVector;
34+
import org.apache.arrow.vector.complex.UnionVector;
35+
36+
import io.netty.util.internal.PlatformDependent;
37+
38+
/**
39+
* Utility to append two vectors together.
40+
*/
41+
class VectorAppender implements VectorVisitor<ValueVector, Void> {
42+
43+
/**
44+
* The targetVector to be appended.
45+
*/
46+
private final ValueVector targetVector;
47+
48+
private final TypeEqualsVisitor typeVisitor;
49+
50+
/**
51+
* Constructs a new targetVector appender, with the given targetVector.
52+
* @param targetVector the targetVector to be appended.
53+
*/
54+
VectorAppender(ValueVector targetVector) {
55+
this.targetVector = targetVector;
56+
typeVisitor = new TypeEqualsVisitor(targetVector, false, true);
57+
}
58+
59+
@Override
60+
public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) {
61+
Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
62+
"The targetVector to append must have the same type as the targetVector being appended");
63+
64+
int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
65+
66+
// make sure there is enough capacity
67+
while (targetVector.getValueCapacity() < newValueCount) {
68+
targetVector.reAlloc();
69+
}
70+
71+
// append validity buffer
72+
BitVectorHelper.concatBits(
73+
targetVector.getValidityBuffer(), targetVector.getValueCount(),
74+
deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
75+
76+
// append data buffer
77+
PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(),
78+
targetVector.getDataBuffer().memoryAddress() + deltaVector.getTypeWidth() * targetVector.getValueCount(),
79+
deltaVector.getTypeWidth() * deltaVector.getValueCount());
80+
targetVector.setValueCount(newValueCount);
81+
return targetVector;
82+
}
83+
84+
@Override
85+
public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) {
86+
Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
87+
"The targetVector to append must have the same type as the targetVector being appended");
88+
89+
int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
90+
91+
int targetDataSize = targetVector.getOffsetBuffer().getInt(
92+
targetVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
93+
int deltaDataSize = deltaVector.getOffsetBuffer().getInt(
94+
deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
95+
int newValueCapacity = targetDataSize + deltaDataSize;
96+
97+
// make sure there is enough capacity
98+
while (targetVector.getValueCapacity() < newValueCount) {
99+
targetVector.reAlloc();
100+
}
101+
while (targetVector.getDataBuffer().capacity() < newValueCapacity) {
102+
((BaseVariableWidthVector) targetVector).reallocDataBuffer();
103+
}
104+
105+
// append validity buffer
106+
BitVectorHelper.concatBits(
107+
targetVector.getValidityBuffer(), targetVector.getValueCount(),
108+
deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
109+
110+
// append data buffer
111+
PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(),
112+
targetVector.getDataBuffer().memoryAddress() + targetDataSize, deltaDataSize);
113+
114+
// copy offset buffer
115+
PlatformDependent.copyMemory(
116+
deltaVector.getOffsetBuffer().memoryAddress() + BaseVariableWidthVector.OFFSET_WIDTH,
117+
targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
118+
BaseVariableWidthVector.OFFSET_WIDTH,
119+
deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
120+
121+
// increase each offset from the second buffer
122+
for (int i = 0; i < deltaVector.getValueCount(); i++) {
123+
int oldOffset = targetVector.getOffsetBuffer().getInt((targetVector.getValueCount() + 1 + i) *
124+
BaseVariableWidthVector.OFFSET_WIDTH);
125+
targetVector.getOffsetBuffer().setInt(
126+
(targetVector.getValueCount() + 1 + i) *
127+
BaseVariableWidthVector.OFFSET_WIDTH, oldOffset + targetDataSize);
128+
}
129+
((BaseVariableWidthVector) targetVector).setLastSet(newValueCount - 1);
130+
targetVector.setValueCount(newValueCount);
131+
return targetVector;
132+
}
133+
134+
@Override
135+
public ValueVector visit(ListVector deltaVector, Void value) {
136+
Preconditions.checkArgument(typeVisitor.equals(deltaVector),
137+
"The targetVector to append must have the same type as the targetVector being appended");
138+
139+
int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
140+
141+
int targetListSize = targetVector.getOffsetBuffer().getInt(
142+
targetVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
143+
int deltaListSize = deltaVector.getOffsetBuffer().getInt(
144+
deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
145+
146+
ListVector targetListVector = (ListVector) targetVector;
147+
148+
// make sure the underlying vector has value count set
149+
targetListVector.getDataVector().setValueCount(targetListSize);
150+
deltaVector.getDataVector().setValueCount(deltaListSize);
151+
152+
// make sure there is enough capacity
153+
while (targetVector.getValueCapacity() < newValueCount) {
154+
targetVector.reAlloc();
155+
}
156+
157+
// append validity buffer
158+
BitVectorHelper.concatBits(
159+
targetVector.getValidityBuffer(), targetVector.getValueCount(),
160+
deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
161+
162+
// append offset buffer
163+
PlatformDependent.copyMemory(deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH,
164+
targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
165+
ListVector.OFFSET_WIDTH,
166+
deltaVector.getValueCount() * ListVector.OFFSET_WIDTH);
167+
168+
// increase each offset from the second buffer
169+
for (int i = 0; i < deltaVector.getValueCount(); i++) {
170+
int oldOffset =
171+
targetVector.getOffsetBuffer().getInt((targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH);
172+
targetVector.getOffsetBuffer().setInt((targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH,
173+
oldOffset + targetListSize);
174+
}
175+
targetListVector.setLastSet(newValueCount - 1);
176+
177+
// append underlying vectors
178+
VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector());
179+
deltaVector.getDataVector().accept(innerAppender, null);
180+
181+
targetVector.setValueCount(newValueCount);
182+
return targetVector;
183+
}
184+
185+
@Override
186+
public ValueVector visit(FixedSizeListVector deltaVector, Void value) {
187+
Preconditions.checkArgument(typeVisitor.equals(deltaVector),
188+
"The vector to append must have the same type as the targetVector being appended");
189+
190+
FixedSizeListVector targetListVector = (FixedSizeListVector) targetVector;
191+
192+
Preconditions.checkArgument(targetListVector.getListSize() == deltaVector.getListSize(),
193+
"FixedSizeListVector must have the same list size to append");
194+
195+
int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
196+
197+
int targetListSize = targetListVector.getValueCount() * targetListVector.getListSize();
198+
int deltaListSize = deltaVector.getValueCount() * deltaVector.getListSize();
199+
200+
// make sure the underlying vector has value count set
201+
targetListVector.getDataVector().setValueCount(targetListSize);
202+
deltaVector.getDataVector().setValueCount(deltaListSize);
203+
204+
// make sure there is enough capacity
205+
while (targetVector.getValueCapacity() < newValueCount) {
206+
targetVector.reAlloc();
207+
}
208+
209+
// append validity buffer
210+
BitVectorHelper.concatBits(
211+
targetVector.getValidityBuffer(), targetVector.getValueCount(),
212+
deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
213+
214+
// append underlying vectors
215+
VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector());
216+
deltaVector.getDataVector().accept(innerAppender, null);
217+
218+
targetVector.setValueCount(newValueCount);
219+
return targetVector;
220+
}
221+
222+
@Override
223+
public ValueVector visit(NonNullableStructVector deltaVector, Void value) {
224+
Preconditions.checkArgument(typeVisitor.equals(deltaVector),
225+
"The vector to append must have the same type as the targetVector being appended");
226+
227+
NonNullableStructVector targetStructVector = (NonNullableStructVector) targetVector;
228+
int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
229+
230+
// make sure there is enough capacity
231+
while (targetVector.getValueCapacity() < newValueCount) {
232+
targetVector.reAlloc();
233+
}
234+
235+
// append validity buffer
236+
BitVectorHelper.concatBits(
237+
targetVector.getValidityBuffer(), targetVector.getValueCount(),
238+
deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
239+
240+
// append child vectors
241+
for (int i = 0; i < targetStructVector.getChildrenFromFields().size(); i++) {
242+
ValueVector targetChild = targetStructVector.getVectorById(i);
243+
ValueVector deltaChild = deltaVector.getVectorById(i);
244+
245+
targetChild.setValueCount(targetStructVector.getValueCount());
246+
deltaChild.setValueCount(deltaVector.getValueCount());
247+
248+
VectorAppender innerAppender = new VectorAppender(targetChild);
249+
deltaChild.accept(innerAppender, null);
250+
}
251+
252+
targetVector.setValueCount(newValueCount);
253+
return targetVector;
254+
}
255+
256+
@Override
257+
public ValueVector visit(UnionVector deltaVector, Void value) {
258+
// we only make sure that both vectors are union vectors.
259+
Preconditions.checkArgument(targetVector.getMinorType() == deltaVector.getMinorType(),
260+
"The vector to append must have the same type as the targetVector being appended");
261+
262+
UnionVector targetUnionVector = (UnionVector) targetVector;
263+
int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
264+
265+
// make sure there is enough capacity
266+
while (targetUnionVector.getValueCapacity() < newValueCount) {
267+
targetUnionVector.reAlloc();
268+
}
269+
270+
// append type buffers
271+
PlatformDependent.copyMemory(deltaVector.getValidityBufferAddress(),
272+
targetUnionVector.getValidityBufferAddress() + targetVector.getValueCount(),
273+
deltaVector.getValueCount());
274+
275+
// build the hash set for all types
276+
HashSet<Integer> targetTypes = new HashSet<>();
277+
for (int i = 0; i < targetUnionVector.getValueCount(); i++) {
278+
targetTypes.add((int) targetUnionVector.getValidityBuffer().getByte(i));
279+
}
280+
HashSet<Integer> deltaTypes = new HashSet<>();
281+
for (int i = 0; i < deltaVector.getValueCount(); i++) {
282+
deltaTypes.add((int) deltaVector.getValidityBuffer().getByte(i));
283+
}
284+
285+
// append child vectors
286+
for (int i = 0; i < Byte.MAX_VALUE; i++) {
287+
if (targetTypes.contains(i) || deltaTypes.contains(i)) {
288+
ValueVector targetChild = targetUnionVector.getVectorByType(i);
289+
if (!targetTypes.contains(i)) {
290+
// if the vector type does not exist in the target, it must be newly created
291+
// and we must make sure it has enough capacity.
292+
while (targetChild.getValueCapacity() < newValueCount) {
293+
targetChild.reAlloc();
294+
}
295+
}
296+
297+
if (deltaTypes.contains(i)) {
298+
// append child vectors
299+
ValueVector deltaChild = deltaVector.getVectorByType(i);
300+
301+
targetChild.setValueCount(targetUnionVector.getValueCount());
302+
deltaChild.setValueCount(deltaVector.getValueCount());
303+
304+
VectorAppender innerAppender = new VectorAppender(targetChild);
305+
deltaChild.accept(innerAppender, null);
306+
}
307+
targetChild.setValueCount(newValueCount);
308+
}
309+
}
310+
311+
targetVector.setValueCount(newValueCount);
312+
return targetVector;
313+
}
314+
315+
@Override
316+
public ValueVector visit(DenseUnionVector left, Void value) {
317+
throw new UnsupportedOperationException();
318+
}
319+
320+
@Override
321+
public ValueVector visit(NullVector deltaVector, Void value) {
322+
Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
323+
"The targetVector to append must have the same type as the targetVector being appended");
324+
return targetVector;
325+
}
326+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.vector.util;
19+
20+
import org.apache.arrow.vector.ValueVector;
21+
22+
/**
23+
* Utility to add vector values in batch.
24+
*/
25+
public class VectorBatchAppender {
26+
27+
/**
28+
* Add value vectors in batch.
29+
* @param targetVector the target vector.
30+
* @param vectorsToAppend the vectors to append.
31+
* @param <V> the vector type.
32+
*/
33+
public static <V extends ValueVector> void batchAppend(V targetVector, V... vectorsToAppend) {
34+
VectorAppender appender = new VectorAppender(targetVector);
35+
for (V delta : vectorsToAppend) {
36+
delta.accept(appender, null);
37+
}
38+
}
39+
}

0 commit comments

Comments
 (0)