-
Notifications
You must be signed in to change notification settings - Fork 235
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding diagnostics tools for ill conditioning #1299
Changes from all commits
0942f99
b1e5c9b
ba6c08c
34d942d
b637be8
48a1557
3349904
c1d9db6
181c5e0
7b2c2ac
85eb74e
db228a3
447055c
b0d39d8
5316a01
c4c9f5d
1e92d90
f08c298
cc51ef0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,7 @@ | |
This module contains a collection of tools for diagnosing modeling issues. | ||
""" | ||
|
||
__author__ = "Alexander Dowling, Douglas Allan, Andrew Lee" | ||
__author__ = "Alexander Dowling, Douglas Allan, Andrew Lee, Robby Parker, Ben Knueven" | ||
|
||
from operator import itemgetter | ||
import sys | ||
|
@@ -36,8 +36,10 @@ | |
check_optimal_termination, | ||
ConcreteModel, | ||
Constraint, | ||
Expression, | ||
Objective, | ||
Param, | ||
RangeSet, | ||
Set, | ||
SolverFactory, | ||
value, | ||
|
@@ -275,6 +277,14 @@ | |
description="If False, warnings will not be generated for things like log(x) with x >= 0", | ||
), | ||
) | ||
CONFIG.declare( | ||
"parallel_component_tolerance", | ||
ConfigValue( | ||
default=1e-4, | ||
domain=float, | ||
description="Tolerance for identifying near-parallel Jacobian rows/columns", | ||
), | ||
) | ||
|
||
|
||
SVDCONFIG = ConfigDict() | ||
|
@@ -914,6 +924,70 @@ | |
footer="=", | ||
) | ||
|
||
def display_near_parallel_constraints(self, stream=None): | ||
""" | ||
Display near-parallel (duplicate) constraints in model. | ||
|
||
Args: | ||
stream: I/O object to write report to (default = stdout) | ||
|
||
Returns: | ||
None | ||
|
||
""" | ||
if stream is None: | ||
stream = sys.stdout | ||
|
||
parallel = [ | ||
f"{i[0].name}, {i[1].name}" | ||
for i in check_parallel_jacobian( | ||
model=self._model, | ||
tolerance=self.config.parallel_component_tolerance, | ||
direction="row", | ||
) | ||
] | ||
|
||
# Write the output | ||
_write_report_section( | ||
stream=stream, | ||
lines_list=parallel, | ||
title="The following pairs of constraints are nearly parallel:", | ||
header="=", | ||
footer="=", | ||
) | ||
|
||
def display_near_parallel_variables(self, stream=None): | ||
""" | ||
Display near-parallel (duplicate) variables in model. | ||
|
||
Args: | ||
stream: I/O object to write report to (default = stdout) | ||
|
||
Returns: | ||
None | ||
|
||
""" | ||
if stream is None: | ||
stream = sys.stdout | ||
|
||
parallel = [ | ||
f"{i[0].name}, {i[1].name}" | ||
for i in check_parallel_jacobian( | ||
model=self._model, | ||
tolerance=self.config.parallel_component_tolerance, | ||
direction="column", | ||
) | ||
] | ||
|
||
# Write the output | ||
_write_report_section( | ||
stream=stream, | ||
lines_list=parallel, | ||
title="The following pairs of variables are nearly parallel:", | ||
header="=", | ||
footer="=", | ||
) | ||
|
||
# TODO: Block triangularization analysis | ||
# Number and size of blocks, polynomial degree of 1x1 blocks, simple pivot check of moderate sized sub-blocks? | ||
|
||
|
@@ -1330,7 +1404,8 @@ | |
lines_list=next_steps, | ||
title="Suggested next steps:", | ||
line_if_empty=f"If you still have issues converging your model consider:\n" | ||
f"{TAB*2}prepare_svd_toolbox()\n{TAB*2}prepare_degeneracy_hunter()", | ||
f"{TAB*2}display_near_parallel_constraints()\n{TAB*2}display_near_parallel_variables()" | ||
f"\n{TAB*2}prepare_degeneracy_hunter()\n{TAB*2}prepare_svd_toolbox()", | ||
footer="=", | ||
) | ||
|
||
|
@@ -2086,6 +2161,9 @@ | |
def eq_degenerate(m_dh, v): | ||
# Find the columns with non-zero entries | ||
C = find(J[:, v])[0] | ||
if len(C) == 0: | ||
# Catch for edge-case of trivial constraint 0==0 | ||
return Constraint.Skip | ||
return sum(J[c, v] * m_dh.nu[c] for c in C) == 0 | ||
|
||
else: | ||
|
@@ -3051,6 +3129,238 @@ | |
) | ||
|
||
|
||
def check_parallel_jacobian(model, tolerance: float = 1e-4, direction: str = "row"): | ||
""" | ||
Check for near-parallel rows or columns in the Jacobian. | ||
|
||
Near-parallel rows or columns indicate a potential degeneracy in the model, | ||
as this means that the associated constraints or variables are (near) | ||
duplicates of each other. | ||
|
||
This method is based on work published in: | ||
|
||
Klotz, E., Identification, Assessment, and Correction of Ill-Conditioning and | ||
Numerical Instability in Linear and Integer Programs, Informs 2014, pgs. 54-108 | ||
https://pubsonline.informs.org/doi/epdf/10.1287/educ.2014.0130 | ||
|
||
Args: | ||
model: model to be analysed | ||
tolerance: tolerance to use to determine if constraints/variables are parallel | ||
direction: 'row' (default, constraints) or 'column' (variables) | ||
|
||
Returns: | ||
list of 2-tuples containing parallel Pyomo components | ||
|
||
""" | ||
# Thanks to Robby Parker for the sparse matrix implementation and | ||
# significant performance improvements | ||
|
||
if direction not in ["row", "column"]: | ||
raise ValueError( | ||
f"Unrecognised value for direction ({direction}). " | ||
"Must be 'row' or 'column'." | ||
) | ||
|
||
jac, nlp = get_jacobian(model, scaled=False) | ||
|
||
# Get vectors that we will check, and the Pyomo components | ||
# they correspond to. | ||
if direction == "row": | ||
components = nlp.get_pyomo_constraints() | ||
csrjac = jac.tocsr() | ||
# Make everything a column vector (CSC) for consistency | ||
vectors = [csrjac[i, :].transpose().tocsc() for i in range(len(components))] | ||
elif direction == "column": | ||
components = nlp.get_pyomo_variables() | ||
cscjac = jac.tocsc() | ||
vectors = [cscjac[:, i] for i in range(len(components))] | ||
|
||
# List to store pairs of parallel components | ||
parallel = [] | ||
|
||
vectors_by_nz = {} | ||
for vecidx, vec in enumerate(vectors): | ||
maxval = max(np.abs(vec.data)) | ||
# Construct tuple of sorted col/row indices that participate | ||
# in this vector (with non-negligible coefficient). | ||
nz = tuple( | ||
sorted( | ||
idx | ||
for idx, val in zip(vec.indices, vec.data) | ||
if abs(val) > tolerance and abs(val) / maxval > tolerance | ||
) | ||
) | ||
if nz in vectors_by_nz: | ||
# Store the index as well so we know what component this | ||
# correrponds to. | ||
vectors_by_nz[nz].append((vec, vecidx)) | ||
else: | ||
vectors_by_nz[nz] = [(vec, vecidx)] | ||
|
||
for vecs in vectors_by_nz.values(): | ||
for idx, (u, uidx) in enumerate(vecs): | ||
# idx is the "local index", uidx is the "global index" | ||
# Frobenius norm of the matrix is 2-norm of this column vector | ||
unorm = norm(u, ord="fro") | ||
for v, vidx in vecs[idx + 1 :]: | ||
vnorm = norm(v, ord="fro") | ||
|
||
# Explicitly multiply a row vector * column vector | ||
prod = u.transpose().dot(v) | ||
absprod = abs(prod[0, 0]) | ||
diff = abs(absprod - unorm * vnorm) | ||
if diff <= tolerance or diff <= tolerance * max(unorm, vnorm): | ||
parallel.append((uidx, vidx)) | ||
|
||
parallel = [(components[uidx], components[vidx]) for uidx, vidx in parallel] | ||
return parallel | ||
|
||
|
||
def compute_ill_conditioning_certificate( | ||
model, | ||
target_feasibility_tol: float = 1e-06, | ||
ratio_cutoff: float = 1e-04, | ||
direction: str = "row", | ||
): | ||
""" | ||
Finds constraints (rows) or variables (columns) in the model Jacobian that | ||
may be contributing to ill conditioning. | ||
|
||
This method is based on work published in: | ||
|
||
Klotz, E., Identification, Assessment, and Correction of Ill-Conditioning and | ||
Numerical Instability in Linear and Integer Programs, Informs 2014, pgs. 54-108 | ||
https://pubsonline.informs.org/doi/epdf/10.1287/educ.2014.0130 | ||
|
||
Args: | ||
model: model to be analysed | ||
target_feasibility_tol: target tolerance for solving ill conditioning problem | ||
ratio_cutoff: cut-off for reporting ill conditioning | ||
direction: 'row' (default, constraints) or 'column' (variables) | ||
|
||
Returns: | ||
list of strings reporting ill-conditioned variables/constraints and their | ||
associated y values | ||
""" | ||
_log.warning( | ||
"Ill conditioning checks are a beta capability. Please be aware that " | ||
"the name, location, and API for this may change in future releases." | ||
) | ||
# Thanks to B. Knueven for this implementation | ||
|
||
if direction not in ["row", "column"]: | ||
raise ValueError( | ||
f"Unrecognised value for direction ({direction}). " | ||
"Must be 'row' or 'column'." | ||
) | ||
|
||
jac, nlp = get_jacobian(model, scaled=False) | ||
|
||
inverse_target_kappa = 1e-16 / target_feasibility_tol | ||
|
||
# Set up the components we will analyze, either row or column | ||
if direction == "row": | ||
components = nlp.get_pyomo_constraints() | ||
components_set = RangeSet(0, len(components) - 1) | ||
results_set = RangeSet(0, nlp.n_primals() - 1) | ||
jac = jac.transpose().tocsr() | ||
elif direction == "column": | ||
components = nlp.get_pyomo_variables() | ||
components_set = RangeSet(0, len(components) - 1) | ||
results_set = RangeSet(0, nlp.n_constraints() - 1) | ||
jac = jac.tocsr() | ||
|
||
# Build test problem | ||
inf_prob = ConcreteModel() | ||
|
||
inf_prob.y_pos = Var(components_set, bounds=(0, None)) | ||
inf_prob.y_neg = Var(components_set, bounds=(0, None)) | ||
inf_prob.y = Expression(components_set, rule=lambda m, i: m.y_pos[i] - m.y_neg[i]) | ||
|
||
inf_prob.res_pos = Var(results_set, bounds=(0, None)) | ||
inf_prob.res_neg = Var(results_set, bounds=(0, None)) | ||
inf_prob.res = Expression( | ||
results_set, rule=lambda m, i: m.res_pos[i] - m.res_neg[i] | ||
) | ||
|
||
def b_rule(b, i): | ||
lhs = 0.0 | ||
|
||
row = jac.getrow(i) | ||
for j, val in zip(row.indices, row.data): | ||
lhs += val * b.y[j] | ||
|
||
return lhs == b.res[i] | ||
|
||
inf_prob.by = Constraint(results_set, rule=b_rule) | ||
|
||
# Normalization of y | ||
inf_prob.normalize = Constraint( | ||
expr=1 == sum(inf_prob.y_pos.values()) - sum(inf_prob.y_neg.values()) | ||
) | ||
|
||
inf_prob.y_norm = Var() | ||
inf_prob.y_norm_constr = Constraint( | ||
expr=inf_prob.y_norm | ||
== sum(inf_prob.y_pos.values()) + sum(inf_prob.y_neg.values()) | ||
) | ||
|
||
inf_prob.res_norm = Var() | ||
inf_prob.res_norm_constr = Constraint( | ||
expr=inf_prob.res_norm | ||
== sum(inf_prob.res_pos.values()) + sum(inf_prob.res_neg.values()) | ||
) | ||
|
||
# Objective -- minimize residual | ||
inf_prob.min_res = Objective(expr=inf_prob.res_norm) | ||
|
||
solver = SolverFactory("cbc") # TODO: Consider making this an option | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to default to HiGHS or SCIP, although I understand if that can't happen for the time being. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The best solver to pick would probably be SoPlex as we can set arbitrary precision and utilize its iterative refinement capability. But I went with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI, for testing at least we do have the AMPL version of SCIP available now. For DegeneracyHunter, I made the solver a user argument with SCIP as the default. |
||
|
||
# tighten tolerances # TODO: If solver is an option, need to allow user options | ||
solver.options["primalT"] = target_feasibility_tol * 1e-1 | ||
solver.options["dualT"] = target_feasibility_tol * 1e-1 | ||
|
||
results = solver.solve(inf_prob, tee=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This does not have to happen now, but I generally like splitting these types of functions into two functions: One that simply builds the model, and one that just builds the model (by calling the first function), then solves the model. That way an advanced user can build the model and inspect it, apply a transformation, or use a custom solver. It also makes it easier to see what the default solver and options are. I'm open to arguments that this is a bad idea, and again, this does not need to happen now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We actually solve two problems at the moment -- we minimize the norm of the RHS, then we fix the norm of the RHS and minimize the 1-norm of the vector. That makes a cleaner workflow more difficult. I also imagine eventually trying to re-solve with tighter tolerances if the answer is nonsensical, e.g., the norm of the RHS is negative. These things can be done automatically much more straightforwardly if we pick a single solver. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did not realize that we solve two problems. I see the difficulty in splitting this up in that case. |
||
if not check_optimal_termination(results): | ||
# TODO: maybe we should tighten tolerances first? | ||
raise RuntimeError("Ill conditioning diagnostic problem infeasible") | ||
|
||
result_norm = inf_prob.res_norm.value | ||
andrewlee94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if result_norm < 0.0: | ||
# TODO: try again with tighter tolerances? | ||
raise RuntimeError( | ||
"Ill conditioning diagnostic problem has numerically troublesome solution" | ||
) | ||
if result_norm >= inverse_target_kappa: | ||
return [] | ||
|
||
# find an equivalent solution which minimizes y_norm | ||
inf_prob.min_res.deactivate() | ||
inf_prob.res_norm.fix() | ||
|
||
inf_prob.min_y = Objective(expr=inf_prob.y_norm) | ||
|
||
# if this problem is numerically infeasible, we can still report something to the user | ||
results = solver.solve(inf_prob, tee=False, load_solutions=False) | ||
if check_optimal_termination(results): | ||
inf_prob.solutions.load_from(results) | ||
|
||
ill_cond = [] | ||
slist = sorted( | ||
inf_prob.y, key=lambda dict_key: abs(value(inf_prob.y[dict_key])), reverse=True | ||
) | ||
cutoff = None | ||
for i in slist: | ||
if cutoff is None: | ||
cutoff = abs(value(inf_prob.y[i])) * ratio_cutoff | ||
val = value(inf_prob.y[i]) | ||
if abs(val) < cutoff: | ||
break | ||
ill_cond.append((components[i], val)) | ||
|
||
return ill_cond | ||
|
||
|
||
# ------------------------------------------------------------------------------------------- | ||
# Private module functions | ||
def _var_in_block(var, block): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this called
inf_prob
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably should just go with
m
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like
m
. I'm fine with another name too, I just don't know what "inf" is.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"Infeasibility" of the linear dependence constraint?