Skip to content

Commit

Permalink
[improvement](statistics)Return -1 to neredis if report olap table ro…
Browse files Browse the repository at this point in the history
…w count for new table is not done for all tablets. (apache#40457)

Return -1 to neredis if report olap table row count for new table is not
done for all tablets.
After this change, nereids could know new table is empty or not. When
it's not empty but not reported yet, return -1 as row count to nereids.
  • Loading branch information
Jibing-Li committed Sep 14, 2024
1 parent 168742d commit 9cb425b
Show file tree
Hide file tree
Showing 14 changed files with 169 additions and 50 deletions.
1 change: 1 addition & 0 deletions be/src/olap/tablet_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,7 @@ void TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>*
t_tablet_stat.__set_row_count(tablet_info.row_count);
t_tablet_stat.__set_total_version_count(tablet_info.total_version_count);
t_tablet_stat.__set_visible_version_count(tablet_info.visible_version_count);
t_tablet_stat.__set_visible_version(tablet_info.version);
};
for_each_tablet(handler, filter_all_tablets);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ public class ShowTableStatsStmt extends ShowStmt {
new ImmutableList.Builder<String>()
.add("table_name")
.add("index_name")
.add("row_count")
.add("analyze_row_count")
.add("report_row_count")
.add("report_row_count_for_nereids")
.build();

private final TableName tableName;
Expand Down Expand Up @@ -167,37 +169,33 @@ public long getTableId() {
return tableId;
}

public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
public ShowResultSet constructResultSet(TableStatsMeta tableStatistic, TableIf table) {
if (indexName != null) {
return constructIndexResultSet(tableStatistic);
return constructIndexResultSet(tableStatistic, table);
}
return constructTableResultSet(tableStatistic);
return constructTableResultSet(tableStatistic, table);
}

public ShowResultSet constructEmptyResultSet() {
return new ShowResultSet(getMetaData(), new ArrayList<>());
}

public ShowResultSet constructResultSet(long rowCount) {
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add("");
row.add("");
row.add(String.valueOf(rowCount));
row.add("");
row.add("");
row.add("");
row.add("");
row.add("");
result.add(row);
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic, TableIf table) {
if (tableStatistic == null) {
return new ShowResultSet(getMetaData(), new ArrayList<>());
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add("");
row.add("");
row.add(String.valueOf(table.getCachedRowCount()));
row.add("");
row.add("");
row.add("");
row.add("");
row.add("");
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add(String.valueOf(tableStatistic.updatedRows));
Expand All @@ -216,7 +214,7 @@ public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) {
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic, TableIf table) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
return new ShowResultSet(getMetaData(), result);
Expand All @@ -226,14 +224,13 @@ public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
if (indexId == null) {
throw new RuntimeException(String.format("Index %s not exist.", indexName));
}
long rowCount = tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
if (rowCount == -1) {
return new ShowResultSet(getMetaData(), result);
}
long rowCount = tableStatistic == null ? -1 : tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
List<String> row = Lists.newArrayList();
row.add(table.getName());
row.add(indexName);
row.add(String.valueOf(rowCount));
row.add(String.valueOf(olapTable.getRowCountForIndex(indexId, false)));
row.add(String.valueOf(olapTable.getRowCountForIndex(indexId, true)));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ public enum IndexExtState {
@SerializedName(value = "rollupFinishedVersion")
private long rollupFinishedVersion;

private boolean rowCountReported = false;

public MaterializedIndex() {
this.state = IndexState.NORMAL;
this.idToTablets = new HashMap<>();
Expand Down Expand Up @@ -206,6 +208,14 @@ public int getTabletOrderIdx(long tabletId) {
return -1;
}

public void setRowCountReported(boolean reported) {
this.rowCountReported = reported;
}

public boolean getRowCountReported() {
return this.rowCountReported;
}

@Override
public void write(DataOutput out) throws IOException {
super.write(out);
Expand Down
13 changes: 6 additions & 7 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -1396,18 +1396,17 @@ public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {

@Override
public long fetchRowCount() {
long rowCount = 0;
for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) {
rowCount += entry.getValue().getBaseIndex().getRowCount();
}
return rowCount;
return getRowCountForIndex(baseIndexId, false);
}

public long getRowCountForIndex(long indexId) {
public long getRowCountForIndex(long indexId, boolean strict) {
long rowCount = 0;
for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) {
MaterializedIndex index = entry.getValue().getIndex(indexId);
rowCount += index == null ? 0 : index.getRowCount();
if (strict && !index.getRowCountReported()) {
return -1;
}
rowCount += (index == null || index.getRowCount() == -1) ? 0 : index.getRowCount();
}
return rowCount;
}
Expand Down
10 changes: 10 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ public enum ReplicaStatus {

private long userDropTime = -1;

private long lastReportVersion = 0;

public Replica() {
}

Expand Down Expand Up @@ -811,4 +813,12 @@ public boolean isScheduleAvailable() {
return Env.getCurrentSystemInfo().checkBackendScheduleAvailable(backendId)
&& !isUserDrop();
}

public void setLastReportVersion(long version) {
this.lastReportVersion = version;
}

public long getLastReportVersion() {
return lastReportVersion;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,17 +118,41 @@ protected void runAfterCatalogReady() {
long version = partition.getVisibleVersion();
for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
long indexRowCount = 0L;
boolean indexReported = true;
for (Tablet tablet : index.getTablets()) {
long tabletRowCount = 0L;
boolean tabletReported = false;
for (Replica replica : tablet.getReplicas()) {
LOG.debug("Table {} replica {} current version {}, report version {}",
olapTable.getName(), replica.getId(),
replica.getVersion(), replica.getLastReportVersion());
if (replica.checkVersionCatchUp(version, false)
&& replica.getRowCount() > tabletRowCount) {
&& replica.getRowCount() >= tabletRowCount) {
// 1. If replica version and reported replica version are all equal to
// PARTITION_INIT_VERSION, set tabletReported to true, which indicates this
// tablet is empty for sure when previous report.
// 2. If last report version is larger than PARTITION_INIT_VERSION, set
// tabletReported to true as well. That is, we only guarantee all replicas of
// the tablet are reported for the init version.
// e.g. When replica version is 2, but last reported version is 1,
// tabletReported would be false.
if (replica.getVersion() == Partition.PARTITION_INIT_VERSION
&& replica.getLastReportVersion() == Partition.PARTITION_INIT_VERSION
|| replica.getLastReportVersion() > Partition.PARTITION_INIT_VERSION) {
tabletReported = true;
}
tabletRowCount = replica.getRowCount();
}
}
indexRowCount += tabletRowCount;
// Only when all tablets of this index are reported, we set indexReported to true.
indexReported = indexReported && tabletReported;
} // end for tablets
index.setRowCountReported(indexReported);
index.setRowCount(indexRowCount);
LOG.debug("Table {} index {} all tablets reported[{}], row count {}",
olapTable.getName(), olapTable.getIndexNameById(index.getId()),
indexReported, indexRowCount);
} // end for indices
} // end for partitions
if (LOG.isDebugEnabled()) {
Expand Down Expand Up @@ -157,6 +181,9 @@ private void updateTabletStat(Long beId, TTabletStatResult result) {
replica.setTotalVersionCount(stat.getTotalVersionCount());
replica.setVisibleVersionCount(stat.isSetVisibleVersionCount() ? stat.getVisibleVersionCount()
: stat.getTotalVersionCount());
// Older version BE doesn't set visible version. Set it to max for compatibility.
replica.setLastReportVersion(stat.isSetVisibleVersion() ? stat.getVisibleVersion()
: Long.MAX_VALUE);
}
}
}
Expand Down
11 changes: 2 additions & 9 deletions fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -2520,19 +2520,12 @@ private void handleShowTableStats() {
if (tableStats == null) {
resultSet = showTableStatsStmt.constructEmptyResultSet();
} else {
resultSet = showTableStatsStmt.constructResultSet(tableStats);
resultSet = showTableStatsStmt.constructResultSet(tableStats, tableIf);
}
return;
}
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId());
/*
tableStats == null means it's not analyzed, in this case show the estimated row count.
*/
if (tableStats == null) {
resultSet = showTableStatsStmt.constructResultSet(tableIf.getCachedRowCount());
} else {
resultSet = showTableStatsStmt.constructResultSet(tableStats);
}
resultSet = showTableStatsStmt.constructResultSet(tableStats, tableIf);
}

private void handleShowColumnStats() throws AnalysisException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException {
if (tableStats == null) {
return;
}
invalidateLocalStats(catalogId, dbId, tblId, cols, tableStats);
invalidateLocalStats(catalogId, dbId, tblId, dropStatsStmt.isAllColumns() ? null : cols, tableStats);
// Drop stats ddl is master only operation.
invalidateRemoteStats(catalogId, dbId, tblId, cols, dropStatsStmt.isAllColumns());
StatisticsRepository.dropStatisticsByColNames(catalogId, dbId, tblId, cols);
Expand All @@ -655,7 +655,7 @@ public void dropStats(TableIf table) throws DdlException {
long dbId = table.getDatabase().getId();
long tableId = table.getId();
Set<String> cols = table.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet());
invalidateLocalStats(catalogId, dbId, tableId, cols, tableStats);
invalidateLocalStats(catalogId, dbId, tableId, null, tableStats);
// Drop stats ddl is master only operation.
invalidateRemoteStats(catalogId, dbId, tableId, cols, true);
StatisticsRepository.dropStatisticsByColNames(catalogId, dbId, table.getId(), cols);
Expand Down Expand Up @@ -717,6 +717,8 @@ public void invalidateLocalStats(long catalogId, long dbId, long tableId,
// To remove stale column name that is changed before.
if (allColumn) {
tableStats.removeAllColumn();
tableStats.clearIndexesRowCount();
removeTableStats(tableId);
}
tableStats.updatedTime = 0;
tableStats.userInjected = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ protected void doSample() throws Exception {
List<Long> tabletIds = pair.first;
long totalRowCount = info.indexId == -1
? tbl.getRowCount()
: ((OlapTable) tbl).getRowCountForIndex(info.indexId);
: ((OlapTable) tbl).getRowCountForIndex(info.indexId, false);
double scaleFactor = (double) totalRowCount / (double) pair.second;
// might happen if row count in fe metadata hasn't been updated yet
if (Double.isInfinite(scaleFactor) || Double.isNaN(scaleFactor)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
public boolean userInjected;

@SerializedName("irc")
public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();
private ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();

@VisibleForTesting
public TableStatsMeta() {
Expand Down Expand Up @@ -212,6 +212,10 @@ public long getRowCount(long indexId) {
return indexesRowCount.getOrDefault(indexId, -1L);
}

public void clearIndexesRowCount() {
indexesRowCount.clear();
}

private void clearStaleIndexRowCount(OlapTable table) {
Iterator<Long> iterator = indexesRowCount.keySet().iterator();
List<Long> indexIds = table.getIndexIds();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2102,6 +2102,9 @@ private boolean updateCatalogAfterVisible(TransactionState transactionState, Dat
}
}
replica.updateVersionWithFailed(newVersion, lastFailedVersion, lastSuccessVersion);
if (newVersion == Partition.PARTITION_INIT_VERSION + 1) {
index.setRowCountReported(false);
}
Set<Long> partitionIds = backendPartitions.get(replica.getBackendId());
if (partitionIds == null) {
partitionIds = Sets.newHashSet();
Expand Down
1 change: 1 addition & 0 deletions gensrc/thrift/BackendService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct TTabletStat {
4: optional i64 total_version_count
5: optional i64 remote_data_size
6: optional i64 visible_version_count
7: optional i64 visible_version
}

struct TTabletStatResult {
Expand Down
2 changes: 1 addition & 1 deletion regression-test/suites/statistics/analyze_stats.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2753,7 +2753,7 @@ PARTITION `p599` VALUES IN (599)
assertEquals("true", alter_result[0][7])
sql """drop stats alter_test"""
alter_result = sql """show table stats alter_test"""
assertEquals("false", alter_result[0][7])
assertEquals("", alter_result[0][7])
sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='0', 'num_nulls'='0.0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');"""
alter_result = sql """show column stats alter_test(id)"""
logger.info("show column alter_test(id) stats: " + alter_result)
Expand Down
Loading

0 comments on commit 9cb425b

Please sign in to comment.