Skip to content

Commit f12c18e

Browse files
committed
ARROW-13899: [Ruby] Implement slicer by compute kernels
Closes #11083 from kou/ruby-slicer-expression Authored-by: Sutou Kouhei <kou@clear-code.com> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent a49048b commit f12c18e

File tree

8 files changed

+301
-149
lines changed

8 files changed

+301
-149
lines changed

c_glib/arrow-glib/compute.cpp

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ G_BEGIN_DECLS
165165
* #GArrowSortOptions is a class to customize the `sort_indices`
166166
* function.
167167
*
168+
* #GArrowSetLookupOptions is a class to customize the `is_in` function
169+
* and `index_in` function.
170+
*
168171
* There are many functions to compute data on an array.
169172
*/
170173

@@ -2417,6 +2420,157 @@ garrow_sort_options_set_sort_keys(GArrowSortOptions *options,
24172420
}
24182421

24192422

2423+
typedef struct GArrowSetLookupOptionsPrivate_ {
2424+
GArrowDatum *value_set;
2425+
} GArrowSetLookupOptionsPrivate;
2426+
2427+
enum {
2428+
PROP_SET_LOOKUP_OPTIONS_VALUE_SET = 1,
2429+
PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS,
2430+
};
2431+
2432+
G_DEFINE_TYPE_WITH_PRIVATE(GArrowSetLookupOptions,
2433+
garrow_set_lookup_options,
2434+
GARROW_TYPE_FUNCTION_OPTIONS)
2435+
2436+
#define GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object) \
2437+
static_cast<GArrowSetLookupOptionsPrivate *>( \
2438+
garrow_set_lookup_options_get_instance_private( \
2439+
GARROW_SET_LOOKUP_OPTIONS(object)))
2440+
2441+
static void
2442+
garrow_set_lookup_options_dispose(GObject *object)
2443+
{
2444+
auto priv = GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object);
2445+
2446+
if (priv->value_set) {
2447+
g_object_unref(priv->value_set);
2448+
priv->value_set = NULL;
2449+
}
2450+
2451+
G_OBJECT_CLASS(garrow_set_lookup_options_parent_class)->dispose(object);
2452+
}
2453+
2454+
static void
2455+
garrow_set_lookup_options_set_property(GObject *object,
2456+
guint prop_id,
2457+
const GValue *value,
2458+
GParamSpec *pspec)
2459+
{
2460+
auto priv = GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object);
2461+
auto options =
2462+
garrow_set_lookup_options_get_raw(GARROW_SET_LOOKUP_OPTIONS(object));
2463+
2464+
switch (prop_id) {
2465+
case PROP_SET_LOOKUP_OPTIONS_VALUE_SET:
2466+
priv->value_set = GARROW_DATUM(g_value_dup_object(value));
2467+
options->value_set = garrow_datum_get_raw(priv->value_set);
2468+
break;
2469+
case PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS:
2470+
options->skip_nulls = g_value_get_boolean(value);
2471+
break;
2472+
default:
2473+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
2474+
break;
2475+
}
2476+
}
2477+
2478+
static void
2479+
garrow_set_lookup_options_get_property(GObject *object,
2480+
guint prop_id,
2481+
GValue *value,
2482+
GParamSpec *pspec)
2483+
{
2484+
auto priv = GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object);
2485+
auto options =
2486+
garrow_set_lookup_options_get_raw(GARROW_SET_LOOKUP_OPTIONS(object));
2487+
2488+
switch (prop_id) {
2489+
case PROP_SET_LOOKUP_OPTIONS_VALUE_SET:
2490+
g_value_set_object(value, priv->value_set);
2491+
break;
2492+
case PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS:
2493+
g_value_set_boolean(value, options->skip_nulls);
2494+
break;
2495+
default:
2496+
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
2497+
break;
2498+
}
2499+
}
2500+
2501+
static void
2502+
garrow_set_lookup_options_init(GArrowSetLookupOptions *object)
2503+
{
2504+
auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
2505+
priv->options = static_cast<arrow::compute::FunctionOptions *>(
2506+
new arrow::compute::SetLookupOptions());
2507+
}
2508+
2509+
static void
2510+
garrow_set_lookup_options_class_init(GArrowSetLookupOptionsClass *klass)
2511+
{
2512+
auto gobject_class = G_OBJECT_CLASS(klass);
2513+
2514+
gobject_class->dispose = garrow_set_lookup_options_dispose;
2515+
gobject_class->set_property = garrow_set_lookup_options_set_property;
2516+
gobject_class->get_property = garrow_set_lookup_options_get_property;
2517+
2518+
2519+
arrow::compute::SetLookupOptions options;
2520+
2521+
GParamSpec *spec;
2522+
/**
2523+
* GArrowSetLookupOptions:value-set:
2524+
*
2525+
* The set of values to look up input values into.
2526+
*
2527+
* Since: 6.0.0
2528+
*/
2529+
spec = g_param_spec_object("value-set",
2530+
"Value set",
2531+
"The set of values to look up input values into",
2532+
GARROW_TYPE_DATUM,
2533+
static_cast<GParamFlags>(G_PARAM_READWRITE |
2534+
G_PARAM_CONSTRUCT_ONLY));
2535+
g_object_class_install_property(gobject_class,
2536+
PROP_SET_LOOKUP_OPTIONS_VALUE_SET,
2537+
spec);
2538+
2539+
/**
2540+
* GArrowSetLookupOptions:skip-nulls:
2541+
*
2542+
* Whether NULLs are skipped or not.
2543+
*
2544+
* Since: 6.0.0
2545+
*/
2546+
spec = g_param_spec_boolean("skip-nulls",
2547+
"Skip NULLs",
2548+
"Whether NULLs are skipped or not",
2549+
options.skip_nulls,
2550+
static_cast<GParamFlags>(G_PARAM_READWRITE));
2551+
g_object_class_install_property(gobject_class,
2552+
PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS,
2553+
spec);
2554+
}
2555+
2556+
/**
2557+
* garrow_set_lookup_options_new:
2558+
* @value_set: A #GArrowArrayDatum or #GArrowChunkedArrayDatum to be looked up.
2559+
*
2560+
* Returns: A newly created #GArrowSetLookupOptions.
2561+
*
2562+
* Since: 6.0.0
2563+
*/
2564+
GArrowSetLookupOptions *
2565+
garrow_set_lookup_options_new(GArrowDatum *value_set)
2566+
{
2567+
return GARROW_SET_LOOKUP_OPTIONS(
2568+
g_object_new(GARROW_TYPE_SET_LOOKUP_OPTIONS,
2569+
"value-set", value_set,
2570+
NULL));
2571+
}
2572+
2573+
24202574
/**
24212575
* garrow_array_cast:
24222576
* @array: A #GArrowArray.
@@ -3755,3 +3909,12 @@ garrow_sort_options_get_raw(GArrowSortOptions *options)
37553909
return static_cast<arrow::compute::SortOptions *>(
37563910
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
37573911
}
3912+
3913+
arrow::compute::SetLookupOptions *
3914+
garrow_set_lookup_options_get_raw(GArrowSetLookupOptions *options)
3915+
{
3916+
return static_cast<arrow::compute::SetLookupOptions *>(
3917+
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
3918+
}
3919+
3920+

c_glib/arrow-glib/compute.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,22 @@ garrow_sort_options_add_sort_key(GArrowSortOptions *options,
436436
GArrowSortKey *sort_key);
437437

438438

439+
#define GARROW_TYPE_SET_LOOKUP_OPTIONS (garrow_set_lookup_options_get_type())
440+
G_DECLARE_DERIVABLE_TYPE(GArrowSetLookupOptions,
441+
garrow_set_lookup_options,
442+
GARROW,
443+
SET_LOOKUP_OPTIONS,
444+
GArrowFunctionOptions)
445+
struct _GArrowSetLookupOptionsClass
446+
{
447+
GArrowFunctionOptionsClass parent_class;
448+
};
449+
450+
GARROW_AVAILABLE_IN_6_0
451+
GArrowSetLookupOptions *
452+
garrow_set_lookup_options_new(GArrowDatum *value_set);
453+
454+
439455
GArrowArray *garrow_array_cast(GArrowArray *array,
440456
GArrowDataType *target_data_type,
441457
GArrowCastOptions *options,

c_glib/arrow-glib/compute.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,7 @@ garrow_sort_key_get_raw(GArrowSortKey *sort_key);
8989

9090
arrow::compute::SortOptions *
9191
garrow_sort_options_get_raw(GArrowSortOptions *options);
92+
93+
94+
arrow::compute::SetLookupOptions *
95+
garrow_set_lookup_options_get_raw(GArrowSetLookupOptions *options);

c_glib/test/test-is-in.rb

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,16 @@ def test_null_in_both
4646
assert_equal(build_boolean_array([false, true, true, true]),
4747
left.is_in(right))
4848
end
49+
50+
def test_options
51+
left = build_int16_array([1, 0, nil, 2])
52+
right = build_int16_array([2, 0, nil])
53+
is_in = Arrow::Function.find("is_in")
54+
options = Arrow::SetLookupOptions.new(Arrow::ArrayDatum.new(right))
55+
assert_equal(build_boolean_array([false, true, true, true]),
56+
is_in.execute([Arrow::ArrayDatum.new(left)],
57+
options).value)
58+
end
4959
end
5060

5161
sub_test_case("ChunkedArray") do
@@ -92,5 +102,19 @@ def test_null_in_both
92102
assert_equal(build_boolean_array([false, true, true, true]),
93103
left.is_in_chunked_array(right))
94104
end
105+
106+
def test_options
107+
left = build_int16_array([1, 0, nil, 2])
108+
chunks = [
109+
build_int16_array([2, 0]),
110+
build_int16_array([3, nil])
111+
]
112+
right = Arrow::ChunkedArray.new(chunks)
113+
is_in = Arrow::Function.find("is_in")
114+
options = Arrow::SetLookupOptions.new(Arrow::ChunkedArrayDatum.new(right))
115+
assert_equal(build_boolean_array([false, true, true, true]),
116+
is_in.execute([Arrow::ArrayDatum.new(left)],
117+
options).value)
118+
end
95119
end
96120
end
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
class TestSetLookupOptions < Test::Unit::TestCase
19+
include Helper::Buildable
20+
21+
def test_new
22+
value_set = Arrow::ArrayDatum.new(build_int8_array([1, 2, 3]))
23+
options = Arrow::SetLookupOptions.new(value_set)
24+
assert_equal(value_set, options.value_set)
25+
end
26+
27+
sub_test_case("instance methods") do
28+
def setup
29+
value_set = Arrow::ArrayDatum.new(build_int8_array([1, 2, 3]))
30+
@options = Arrow::SetLookupOptions.new(value_set)
31+
end
32+
33+
def test_skip_nulls
34+
assert do
35+
not @options.skip_nulls?
36+
end
37+
@options.skip_nulls = true
38+
assert do
39+
@options.skip_nulls?
40+
end
41+
end
42+
end
43+
end

ruby/red-arrow/lib/arrow/datum.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ class << self
2121
# @api private
2222
def try_convert(value)
2323
case value
24+
when Table
25+
TableDatum.new(value)
2426
when Array
2527
ArrayDatum.new(value)
2628
when ChunkedArray

0 commit comments

Comments
 (0)