Skip to content

Commit

Permalink
[fix](statistics)Fix empty table keep auto analyze bug. (#40811)
Browse files Browse the repository at this point in the history
1. Fix empty table keep auto analyze bug, reset new partition flag no
matter row count is 0 or not.
2. Use report row count == -1 to check table is empty or not.
  • Loading branch information
Jibing-Li committed Sep 19, 2024
1 parent 79a282a commit 4744e27
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public class AnalysisInfoBuilder {
private boolean usingSqlForPartitionColumn;
private long tblUpdateTime;
private boolean emptyJob;
private boolean userInject;
private boolean userInject = false;
private long rowCount;

public AnalysisInfoBuilder() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.doris.common.io.Writable;
import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisInfo.JobType;
import org.apache.doris.statistics.util.StatisticsUtil;

Expand Down Expand Up @@ -167,7 +166,9 @@ public void reset() {

public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
updatedTime = analyzedJob.tblUpdateTime;
userInjected = analyzedJob.userInject;
if (analyzedJob.userInject) {
userInjected = true;
}
String colNameStr = analyzedJob.colName;
// colName field AnalyzeJob's format likes: "[col1, col2]", we need to remove brackets here
// TODO: Refactor this later
Expand Down Expand Up @@ -195,16 +196,17 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
indexesRowCount.putAll(analyzedJob.indexesRowCount);
clearStaleIndexRowCount((OlapTable) tableIf);
}
if (analyzedJob.emptyJob && AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) {
return;
}
if (analyzedJob.colToPartitions.keySet()
.containsAll(tableIf.getBaseSchema().stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.toSet()))) {
updatedRows.set(0);
newPartitionLoaded.set(false);
}
// Set userInject back to false after manual analyze.
if (JobType.MANUAL.equals(jobType) && !analyzedJob.userInject) {
userInjected = false;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -976,37 +976,26 @@ public static boolean isMvColumn(TableIf table, String columnName) {
}

public static boolean isEmptyTable(TableIf table, AnalysisInfo.AnalysisMethod method) {
int waitRowCountReportedTime = 90;
int waitRowCountReportedTime = 120;
if (!(table instanceof OlapTable) || method.equals(AnalysisInfo.AnalysisMethod.FULL)) {
return false;
}
OlapTable olapTable = (OlapTable) table;
long rowCount = 0;
for (int i = 0; i < waitRowCountReportedTime; i++) {
if (olapTable.getRowCount() > 0) {
return false;
}
boolean allInitVersion = true;
// If all partitions' visible version are PARTITION_INIT_VERSION, return true.
// If any partition's visible version is greater than 2, return true.
// Otherwise, wait row count to be reported.
for (Partition p : olapTable.getPartitions()) {
if (p.getVisibleVersion() != Partition.PARTITION_INIT_VERSION) {
allInitVersion = false;
rowCount = olapTable.getRowCountForIndex(olapTable.getBaseIndexId(), true);
// rowCount == -1 means new table or first load row count not fully reported, need to wait.
if (rowCount == -1) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.info("Sleep interrupted.");
}
if (p.getVisibleVersion() > Partition.PARTITION_INIT_VERSION + 1) {
return true;
}
}
if (allInitVersion) {
return true;
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.info("Sleep interrupted.", e);
continue;
}
break;
}
return true;
return rowCount == 0;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,54 @@ suite("test_drop_stats_and_truncate") {
columns = all_columns.split(",");
assertEquals(9, columns.size())

sql """drop table part"""
sql """CREATE TABLE `part` (
`id` INT NULL,
`colint` INT NULL,
`coltinyint` tinyint NULL,
`colsmallint` smallINT NULL,
`colbigint` bigINT NULL,
`collargeint` largeINT NULL,
`colfloat` float NULL,
`coldouble` double NULL,
`coldecimal` decimal(27, 9) NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
PARTITION BY RANGE(`id`)
(
PARTITION p1 VALUES [("-2147483648"), ("10000")),
PARTITION p2 VALUES [("10000"), ("20000")),
PARTITION p3 VALUES [("20000"), ("30000"))
)
DISTRIBUTED BY HASH(`id`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
)
"""
sql """analyze table part with sync"""
sql """Insert into part values (1, 1, 1, 1, 1, 1, 1.1, 1.1, 1.1)"""
result = sql """show table stats part"""
assertEquals("true", result[0][6])
sql """truncate table part partition(p1)"""
result = sql """show table stats part"""
assertEquals("true", result[0][6])
sql """analyze table part with sample rows 100 with sync"""
result = sql """show table stats part"""
if (result[0][6].equals("true")) {
result = """show index stats part part"""
logger.info("Report not ready. index stats: " + result)
sql """analyze table part with sample rows 100 with sync"""
result = sql """show table stats part"""
}
if (result[0][6].equals("true")) {
result = """show index stats part part"""
logger.info("Report not ready. index stats: " + result)
sql """analyze table part with sample rows 100 with sync"""
result = sql """show table stats part"""
}
assertEquals("false", result[0][6])

sql """drop database if exists test_drop_stats_and_truncate"""
}

0 comments on commit 4744e27

Please sign in to comment.