From 14092fdae147c122d913db5efa82ad7618f0f438 Mon Sep 17 00:00:00 2001 From: Huaxiang Sun Date: Thu, 10 Apr 2025 19:13:21 -0700 Subject: [PATCH 1/2] Minor: Set the global configured column stats enable flag to default #3188 --- .../java/org/apache/parquet/column/ParquetProperties.java | 1 + .../java/org/apache/parquet/hadoop/TestParquetWriter.java | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java index 9aaef4b3cf..cb5931581f 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java @@ -708,6 +708,7 @@ public Builder withStatisticsEnabled(String columnPath, boolean enabled) { } public Builder withStatisticsEnabled(boolean enabled) { + this.statistics.withDefaultValue(enabled); this.statisticsEnabled = enabled; return this; } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java index c8e8f71a91..d48eaf4581 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java @@ -543,7 +543,7 @@ private void testParquetFileNumberOfBlocks( } @Test - public void testSizeStatisticsControl() throws Exception { + public void testSizeStatisticsAndStatisticsControl() throws Exception { MessageType schema = Types.buildMessage() .required(BINARY) .named("string_field") @@ -568,6 +568,7 @@ public void testSizeStatisticsControl() throws Exception { try (ParquetWriter writer = ExampleParquetWriter.builder(path) .withType(schema) .withSizeStatisticsEnabled(false) + .withStatisticsEnabled(false) // Disable column statistics globally .build()) { writer.write(group); } @@ -576,6 +577,7 @@ public void testSizeStatisticsControl() throws Exception { // Verify size statistics are disabled globally for (BlockMetaData block : reader.getFooter().getBlocks()) { for (ColumnChunkMetaData column : block.getColumns()) { + assertTrue(column.getStatistics().isEmpty()); // Make sure there is no column statistics assertNull(column.getSizeStatistics()); } } @@ -589,6 +591,7 @@ public void testSizeStatisticsControl() throws Exception { .withType(schema) .withSizeStatisticsEnabled(true) // enable globally .withSizeStatisticsEnabled("boolean_field", false) // disable for specific column + .withStatisticsEnabled("boolean_field", false) // disable column statistics .build()) { writer.write(group); } @@ -599,8 +602,10 @@ public void testSizeStatisticsControl() throws Exception { for (ColumnChunkMetaData column : block.getColumns()) { if (column.getPath().toDotString().equals("boolean_field")) { assertNull(column.getSizeStatistics()); + assertTrue(column.getStatistics().isEmpty()); } else { assertTrue(column.getSizeStatistics().isValid()); + assertFalse(column.getStatistics().isEmpty()); } } } From 4369866e63c23abc8db12f86f2e5fa66a8517eb7 Mon Sep 17 00:00:00 2001 From: Huaxiang Sun Date: Thu, 10 Apr 2025 21:07:10 -0700 Subject: [PATCH 2/2] Address format complain --- .../java/org/apache/parquet/hadoop/TestParquetWriter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java index d48eaf4581..739aa85d2c 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java @@ -568,7 +568,7 @@ public void testSizeStatisticsAndStatisticsControl() throws Exception { try (ParquetWriter writer = ExampleParquetWriter.builder(path) .withType(schema) .withSizeStatisticsEnabled(false) - .withStatisticsEnabled(false) // Disable column statistics globally + .withStatisticsEnabled(false) // Disable column statistics globally .build()) { writer.write(group); } @@ -577,7 +577,7 @@ public void testSizeStatisticsAndStatisticsControl() throws Exception { // Verify size statistics are disabled globally for (BlockMetaData block : reader.getFooter().getBlocks()) { for (ColumnChunkMetaData column : block.getColumns()) { - assertTrue(column.getStatistics().isEmpty()); // Make sure there is no column statistics + assertTrue(column.getStatistics().isEmpty()); // Make sure there is no column statistics assertNull(column.getSizeStatistics()); } } @@ -591,7 +591,7 @@ public void testSizeStatisticsAndStatisticsControl() throws Exception { .withType(schema) .withSizeStatisticsEnabled(true) // enable globally .withSizeStatisticsEnabled("boolean_field", false) // disable for specific column - .withStatisticsEnabled("boolean_field", false) // disable column statistics + .withStatisticsEnabled("boolean_field", false) // disable column statistics .build()) { writer.write(group); }