Skip to content

Commit

Permalink
[improvement](statistics)Return -1 to neredis if report olap table ro…
Browse files Browse the repository at this point in the history
…w count for new table is not done for all tablets. (apache#40457) (apache#40973)

backport: apache#40457
  • Loading branch information
Jibing-Li authored Sep 26, 2024
1 parent 8cdb27a commit 2f5be08
Show file tree
Hide file tree
Showing 14 changed files with 173 additions and 58 deletions.
1 change: 1 addition & 0 deletions be/src/olap/tablet_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,7 @@ Status TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>
t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size);
t_tablet_stat.__set_row_num(tablet_info.row_count);
t_tablet_stat.__set_version_count(tablet_info.version_count);
t_tablet_stat.__set_visible_version(tablet_info.version);
};
for_each_tablet(handler, filter_all_tablets);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ public class ShowTableStatsStmt extends ShowStmt {
new ImmutableList.Builder<String>()
.add("table_name")
.add("index_name")
.add("row_count")
.add("analyze_row_count")
.add("report_row_count")
.add("report_row_count_for_nereids")
.build();

private final TableName tableName;
Expand Down Expand Up @@ -166,37 +168,33 @@ public long getTableId() {
return tableId;
}

public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
public ShowResultSet constructResultSet(TableStatsMeta tableStatistic, TableIf table) {
if (indexName != null) {
return constructIndexResultSet(tableStatistic);
return constructIndexResultSet(tableStatistic, table);
}
return constructTableResultSet(tableStatistic);
return constructTableResultSet(tableStatistic, table);
}

public ShowResultSet constructEmptyResultSet() {
return new ShowResultSet(getMetaData(), new ArrayList<>());
}

public ShowResultSet constructResultSet(long rowCount) {
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add("");
row.add("");
row.add(String.valueOf(rowCount));
row.add("");
row.add("");
row.add("");
row.add("");
row.add("");
result.add(row);
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic, TableIf table) {
if (tableStatistic == null) {
return new ShowResultSet(getMetaData(), new ArrayList<>());
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add("");
row.add("");
row.add(String.valueOf(table.getRowCount()));
row.add("");
row.add("");
row.add("");
row.add("");
row.add("");
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add(String.valueOf(tableStatistic.updatedRows));
Expand All @@ -215,7 +213,7 @@ public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) {
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic, TableIf table) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
return new ShowResultSet(getMetaData(), result);
Expand All @@ -225,14 +223,13 @@ public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
if (indexId == null) {
throw new RuntimeException(String.format("Index %s not exist.", indexName));
}
long rowCount = tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
if (rowCount == -1) {
return new ShowResultSet(getMetaData(), result);
}
long rowCount = tableStatistic == null ? -1 : tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
List<String> row = Lists.newArrayList();
row.add(table.getName());
row.add(indexName);
row.add(String.valueOf(rowCount));
row.add(String.valueOf(olapTable.getRowCountForIndex(indexId, false)));
row.add(String.valueOf(olapTable.getRowCountForIndex(indexId, true)));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ public enum IndexExtState {
@SerializedName(value = "rollupFinishedVersion")
private long rollupFinishedVersion;

private boolean rowCountReported = false;

public MaterializedIndex() {
this.state = IndexState.NORMAL;
this.idToTablets = new HashMap<>();
Expand Down Expand Up @@ -206,6 +208,14 @@ public int getTabletOrderIdx(long tabletId) {
return -1;
}

public void setRowCountReported(boolean reported) {
this.rowCountReported = reported;
}

public boolean getRowCountReported() {
return this.rowCountReported;
}

@Override
public void write(DataOutput out) throws IOException {
super.write(out);
Expand All @@ -225,6 +235,7 @@ public void write(DataOutput out) throws IOException {
out.writeLong(rollupFinishedVersion);
}

@Deprecated
public void readFields(DataInput in) throws IOException {
super.readFields(in);

Expand Down
13 changes: 6 additions & 7 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -1274,18 +1274,17 @@ public Map<String, Set<String>> findReAnalyzeNeededPartitions() {

@Override
public long fetchRowCount() {
long rowCount = 0;
for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) {
rowCount += entry.getValue().getBaseIndex().getRowCount();
}
return rowCount;
return getRowCountForIndex(baseIndexId, false);
}

public long getRowCountForIndex(long indexId) {
public long getRowCountForIndex(long indexId, boolean strict) {
long rowCount = 0;
for (Map.Entry<Long, Partition> entry : idToPartition.entrySet()) {
MaterializedIndex index = entry.getValue().getIndex(indexId);
rowCount += index == null ? 0 : index.getRowCount();
if (strict && !index.getRowCountReported()) {
return -1;
}
rowCount += (index == null || index.getRowCount() == -1) ? 0 : index.getRowCount();
}
return rowCount;
}
Expand Down
10 changes: 10 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ public enum ReplicaStatus {

private long userDropTime = -1;

private long lastReportVersion = 0;

public Replica() {
}

Expand Down Expand Up @@ -731,4 +733,12 @@ public boolean isScheduleAvailable() {
return Env.getCurrentSystemInfo().checkBackendScheduleAvailable(backendId)
&& !isUserDrop();
}

public void setLastReportVersion(long version) {
this.lastReportVersion = version;
}

public long getLastReportVersion() {
return lastReportVersion;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,41 @@ protected void runAfterCatalogReady() {
long version = partition.getVisibleVersion();
for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
long indexRowCount = 0L;
boolean indexReported = true;
for (Tablet tablet : index.getTablets()) {
long tabletRowCount = 0L;
boolean tabletReported = false;
for (Replica replica : tablet.getReplicas()) {
LOG.debug("Table {} replica {} current version {}, report version {}",
olapTable.getName(), replica.getId(),
replica.getVersion(), replica.getLastReportVersion());
if (replica.checkVersionCatchUp(version, false)
&& replica.getRowCount() > tabletRowCount) {
&& replica.getRowCount() >= tabletRowCount) {
// 1. If replica version and reported replica version are all equal to
// PARTITION_INIT_VERSION, set tabletReported to true, which indicates this
// tablet is empty for sure when previous report.
// 2. If last report version is larger than PARTITION_INIT_VERSION, set
// tabletReported to true as well. That is, we only guarantee all replicas of
// the tablet are reported for the init version.
// e.g. When replica version is 2, but last reported version is 1,
// tabletReported would be false.
if (replica.getVersion() == Partition.PARTITION_INIT_VERSION
&& replica.getLastReportVersion() == Partition.PARTITION_INIT_VERSION
|| replica.getLastReportVersion() > Partition.PARTITION_INIT_VERSION) {
tabletReported = true;
}
tabletRowCount = replica.getRowCount();
}
}
indexRowCount += tabletRowCount;
// Only when all tablets of this index are reported, we set indexReported to true.
indexReported = indexReported && tabletReported;
} // end for tablets
index.setRowCountReported(indexReported);
index.setRowCount(indexRowCount);
LOG.debug("Table {} index {} all tablets reported[{}], row count {}",
olapTable.getName(), olapTable.getIndexNameById(index.getId()),
indexReported, indexRowCount);
} // end for indices
} // end for partitions
LOG.debug("finished to set row num for table: {} in database: {}",
Expand All @@ -148,6 +172,9 @@ private void updateTabletStat(Long beId, TTabletStatResult result) {
replica.setRemoteDataSize(stat.getRemoteDataSize());
replica.setRowCount(stat.getRowNum());
replica.setVersionCount(stat.getVersionCount());
// Older version BE doesn't set visible version. Set it to max for compatibility.
replica.setLastReportVersion(stat.isSetVisibleVersion() ? stat.getVisibleVersion()
: Long.MAX_VALUE);
}
}
}
Expand Down
13 changes: 2 additions & 11 deletions fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -2489,21 +2489,12 @@ private void handleShowTableStats() {
if (tableStats == null) {
resultSet = showTableStatsStmt.constructEmptyResultSet();
} else {
resultSet = showTableStatsStmt.constructResultSet(tableStats);
resultSet = showTableStatsStmt.constructResultSet(tableStats, tableIf);
}
return;
}
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId());
/*
HMSExternalTable table will fetch row count from HMS
or estimate with file size and schema if it's not analyzed.
tableStats == null means it's not analyzed, in this case show the estimated row count.
*/
if (tableStats == null) {
resultSet = showTableStatsStmt.constructResultSet(tableIf.getRowCount());
} else {
resultSet = showTableStatsStmt.constructResultSet(tableStats);
}
resultSet = showTableStatsStmt.constructResultSet(tableStats, tableIf);
}

private void handleShowColumnStats() throws AnalysisException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException {
if (tableStats == null) {
return;
}
invalidateLocalStats(catalogId, dbId, tblId, cols, tableStats);
invalidateLocalStats(catalogId, dbId, tblId, dropStatsStmt.isAllColumns() ? null : cols, tableStats);
// Drop stats ddl is master only operation.
invalidateRemoteStats(catalogId, dbId, tblId, cols, dropStatsStmt.isAllColumns());
StatisticsRepository.dropStatistics(tblId, cols);
Expand All @@ -714,7 +714,7 @@ public void dropStats(TableIf table) throws DdlException {
long dbId = table.getDatabase().getId();
long tableId = table.getId();
Set<String> cols = table.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet());
invalidateLocalStats(catalogId, dbId, tableId, cols, tableStats);
invalidateLocalStats(catalogId, dbId, tableId, null, tableStats);
// Drop stats ddl is master only operation.
invalidateRemoteStats(catalogId, dbId, tableId, cols, true);
StatisticsRepository.dropStatistics(table.getId(), cols);
Expand Down Expand Up @@ -767,6 +767,8 @@ public void invalidateLocalStats(long catalogId, long dbId, long tableId,
// To remove stale column name that is changed before.
if (allColumn) {
tableStats.removeAllColumn();
tableStats.clearIndexesRowCount();
removeTableStats(tableId);
}
tableStats.updatedTime = 0;
tableStats.userInjected = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ protected void doSample() throws Exception {
List<Long> tabletIds = pair.first;
long totalRowCount = info.indexId == -1
? tbl.getRowCount()
: ((OlapTable) tbl).getRowCountForIndex(info.indexId);
: ((OlapTable) tbl).getRowCountForIndex(info.indexId, false);
double scaleFactor = (double) totalRowCount / (double) pair.second;
// might happen if row count in fe metadata hasn't been updated yet
if (Double.isInfinite(scaleFactor) || Double.isNaN(scaleFactor)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
public boolean userInjected;

@SerializedName("irc")
public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();
private ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();

@VisibleForTesting
public TableStatsMeta() {
Expand Down Expand Up @@ -225,6 +225,10 @@ public long getRowCount(long indexId) {
return indexesRowCount.getOrDefault(indexId, -1L);
}

public void clearIndexesRowCount() {
indexesRowCount.clear();
}

private void clearStaleIndexRowCount(OlapTable table) {
Iterator<Long> iterator = indexesRowCount.keySet().iterator();
List<Long> indexIds = table.getIndexIds();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1952,6 +1952,9 @@ private boolean updateCatalogAfterVisible(TransactionState transactionState, Dat
}
}
replica.updateVersionWithFailed(newVersion, lastFailedVersion, lastSuccessVersion);
if (newVersion == Partition.PARTITION_INIT_VERSION + 1) {
index.setRowCountReported(false);
}
}
}
} // end for indices
Expand Down
1 change: 1 addition & 0 deletions gensrc/thrift/BackendService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ struct TTabletStat {
3: optional i64 row_num
4: optional i64 version_count
5: optional i64 remote_data_size
6: optional i64 visible_version
}

struct TTabletStatResult {
Expand Down
2 changes: 1 addition & 1 deletion regression-test/suites/statistics/analyze_stats.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2701,7 +2701,7 @@ PARTITION `p599` VALUES IN (599)
assertEquals("true", alter_result[0][7])
sql """drop stats alter_test"""
alter_result = sql """show table stats alter_test"""
assertEquals("false", alter_result[0][7])
assertEquals("", alter_result[0][7])
sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='0', 'num_nulls'='0.0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');"""
alter_result = sql """show column stats alter_test(id)"""
assertEquals(1, alter_result.size())
Expand Down
Loading

0 comments on commit 2f5be08

Please sign in to comment.