Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[improvement](statistics)Analyze all columns when partition first loaded. #38601

Merged
merged 2 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class AnalyzeProperties {
public static final String PROPERTY_PERIOD_SECONDS = "period.seconds";
public static final String PROPERTY_FORCE_FULL = "force.full";
public static final String PROPERTY_PARTITION_COLUMN_FROM_SQL = "partition.column.from.sql";
public static final String PROPERTY_USE_AUTO_ANALYZER = "use.auto.analyzer";

public static final AnalyzeProperties DEFAULT_PROP = new AnalyzeProperties(new HashMap<String, String>() {
{
Expand Down Expand Up @@ -72,6 +73,7 @@ public class AnalyzeProperties {
.add(PROPERTY_PERIOD_CRON)
.add(PROPERTY_FORCE_FULL)
.add(PROPERTY_PARTITION_COLUMN_FROM_SQL)
.add(PROPERTY_USE_AUTO_ANALYZER)
.build();

public AnalyzeProperties(Map<String, String> properties) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,10 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
.collect(Collectors.toSet()))) {
return true;
}
// Check new partition first loaded.
if (tblStats.newPartitionLoaded != null && tblStats.newPartitionLoaded.get()) {
return true;
}
// 1 Check row count.
long currentRowCount = getRowCount();
long lastAnalyzeRowCount = tblStats.rowCount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,21 @@ public List<AnalysisInfo> buildAnalysisInfosForDB(DatabaseIf<TableIf> db, Analyz
}

// Each analyze stmt corresponding to an analysis job.
public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlException {
public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlException, AnalysisException {
// Using auto analyzer if user specifies.
if (stmt.getAnalyzeProperties().getProperties().containsKey("use.auto.analyzer")) {
StatisticsAutoCollector autoCollector = Env.getCurrentEnv().getStatisticsAutoCollector();
if (autoCollector.skip(stmt.getTable())) {
return;
}
List<AnalysisInfo> jobs = new ArrayList<>();
autoCollector.createAnalyzeJobForTbl(stmt.getDb(), jobs, stmt.getTable());
AnalysisInfo job = autoCollector.getReAnalyzeRequiredPart(jobs.get(0));
if (job != null) {
Env.getCurrentEnv().getStatisticsAutoCollector().createSystemAnalysisJob(job);
}
return;
}
AnalysisInfo jobInfo = buildAndAssignJob(stmt);
if (jobInfo == null) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,12 @@
import org.apache.doris.statistics.AnalysisInfo.ScheduleType;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.collect.Sets;
import org.apache.hudi.common.util.VisibleForTesting;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.time.LocalTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -224,19 +222,6 @@ protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) {
String colNames = jobInfo.colName;
if (table.needReAnalyzeTable(tblStats)) {
needRunPartitions = table.findReAnalyzeNeededPartitions();
} else if (table instanceof OlapTable && tblStats.newPartitionLoaded.get()) {
OlapTable olapTable = (OlapTable) table;
needRunPartitions = new HashMap<>();
Set<String> partitionColumnNames = olapTable.getPartitionInfo().getPartitionColumns().stream()
.map(Column::getName).collect(Collectors.toSet());
colNames = partitionColumnNames.stream().collect(Collectors.joining(","));
Set<String> partitions = Sets.newHashSet();
// No need to filter unchanged partitions, because it may bring unexpected behavior.
// Use dummy partition to skip it.
partitions.add("Dummy Partition");
for (String column : partitionColumnNames) {
needRunPartitions.put(column, partitions);
}
}

if (needRunPartitions == null || needRunPartitions.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@

import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisInfo.JobType;
import org.apache.doris.statistics.util.StatisticsUtil;

Expand Down Expand Up @@ -166,21 +166,16 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
indexesRowCount.putAll(analyzedJob.indexesRowCount);
clearStaleIndexRowCount((OlapTable) tableIf);
}
if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet()
if (analyzedJob.emptyJob && AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) {
return;
}
if (analyzedJob.colToPartitions.keySet()
.containsAll(tableIf.getBaseSchema().stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.toSet()))) {
updatedRows.set(0);
newPartitionLoaded.set(false);
}
if (tableIf instanceof OlapTable) {
PartitionInfo partitionInfo = ((OlapTable) tableIf).getPartitionInfo();
if (partitionInfo != null && analyzedJob.colToPartitions.keySet()
.containsAll(partitionInfo.getPartitionColumns().stream()
.map(Column::getName).collect(Collectors.toSet()))) {
newPartitionLoaded.set(false);
}
}
}
}

Expand All @@ -189,6 +184,9 @@ public void gsonPostProcess() throws IOException {
if (indexesRowCount == null) {
indexesRowCount = new ConcurrentHashMap<>();
}
if (newPartitionLoaded == null) {
newPartitionLoaded = new AtomicBoolean(false);
}
}

public long getRowCount(long indexId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,12 +296,18 @@ public List<Column> getColumns() {

};
OlapTable olapTable = new OlapTable();
TableStatsMeta stats0 = new TableStatsMeta(
50, new AnalysisInfoBuilder().setColToPartitions(new HashMap<>())
.setColName("col1").setRowCount(100).build(), olapTable);
stats0.newPartitionLoaded.set(true);
Assertions.assertTrue(olapTable.needReAnalyzeTable(stats0));

TableStatsMeta stats1 = new TableStatsMeta(
50, new AnalysisInfoBuilder().setColToPartitions(new HashMap<>())
.setColName("col1").setRowCount(100).build(), olapTable);
stats1.updatedRows.addAndGet(70);

Assertions.assertTrue(olapTable.needReAnalyzeTable(stats1));

TableStatsMeta stats2 = new TableStatsMeta(
190, new AnalysisInfoBuilder()
.setColToPartitions(new HashMap<>()).setColName("col1").setRowCount(200).build(), olapTable);
Expand Down
Loading
Loading