-
Notifications
You must be signed in to change notification settings - Fork 0
teste2 ref
Pensando que a relação entre perímetro da forma e sua "diagonal" é da ordem de 3 ou mais, a query abaixo permite avaliar qual o fator que melhor aproxima a raiz da área com o fator do perímetro:
SELECT count(*) n FROM optim.jurisdiction_geom where isolabel_ext like '%-%-%'; -- 8781
-- Find perim3_km factor that minimizes d_median, d_avg and d_dev; and avoid negative d_min or extreme d_max
SELECT round(PERCENTILE_DISC(0.5) within group (order by abs(d)) ) as d_median,
round(avg(abs(d))) as d_avg, round(stddev_pop(abs(d))) as d_dev,
max(abs(d)) as d_max, min(abs(d)) as d_min, min(d) as signed_min,
count(*) filter (where d<0) as signed_n
FROM (
SELECT 200.0*(perim3_km-km2_to_km)/(perim3_km+km2_to_km) AS d -- percentual difference
FROM (
select *, round(sqrt(km2),2)::float as km2_to_km -- query for visual check
from (
select osm_id, isolabel_ext, st_area(geom,true)/1000000.0 as km2,
st_perimeter(geom,true)/3500.0 as perim3_km -- tested: 3000, 3500, 4000 (ok), 4500 (generated negative d_min and more d_dev)
from optim.jurisdiction_geom where isolabel_ext like '%-%-%'
) t1
) t2
)t3; -- AVG of tested permeters: 66.3, 52.4, 28.6
Parece então razoável, depois dessa aproximação de ordem de grandeza, fazer a média entre os dois, quando se tratando de geometrias de jurisdições:
diam_ref = (SQRT(area)+perimetro/3.5)/2.0
Resultados, usando 8781 geometrias de municípios de optim.jurisdiction_geom
. Valores das diferenças percentuais execeto pelo fator:
perim_factor | d_median | d_avg | d_dev | d_max | d_min | signed_min |
---|---|---|---|---|---|---|
3.0 | 65 | 66 | 17 | 167.80 | 16.66 | 16.66 |
3.5 | 51 | 52 | 18 | 162.93 | 1.32 | 1.32 |
4.0 | 38 | 40 | 19 | 158.19 | 0.0294 | -12.05 |
4.5 | 27 | 29 | 18 | 153.57 | 0.0101 | -23.70 |
A quantidade de negativos foi alta, 439, no fator 4.5. Como são esperadas mais geometrias e o é desvio alto, o ajuste é grosseiro, quantizado em 0.5.
A simplificação é realizada em duas etapas, a primeira mais forte com ST_SimplifyPreserveTopology e a segunda "grudando dobras ilhas na escala menor", através de ST_Buffer(+ depois -), depois ST_SimplifyVW para remover redundâncias dentro da mesma escala.
Nota: segundo referências 1 e 2 a suavisação por buffer não seria necessária quando usando algorítmo de Visvalingam–Whyatt. Sugere-se a realização de testes e comparações antes de eliminar o buffer.
DROP FUNCTION ST_viz_simplify_noise;
DROP FUNCTION ST_viz_simplify_noisebefore;
DROP FUNCTION ST_viz_simplify_noiseafter;
CREATE or replace FUNCTION ST_viz_simplify_noise( -- simplify by buffer-eroding
p_geom geometry, -- input polygon
p_radius float, -- buffer radius
p_VWfactor float DEFAULT 0.1 -- reduce buffer radius by a factor, to use as tolerance in ST_SimplifyVW
) RETURNS geometry AS $f$
SELECT ST_SimplifyVW(
ST_Buffer(ST_Buffer(p_geom,p_radius),-p_radius),
p_radius*p_VWfactor
)
$f$ LANGUAGE SQL IMMUTABLE;
CREATE or replace FUNCTION ST_viz_simplify_noisebefore( -- simplify for VIsualiZation
p_geom geometry, -- input polygon
p_both boolean DEFAULT true, -- 3state flag: NULL= only noise, true = both, false= only main
p_factor float DEFAULT 0.1, -- 1 = no reduction, 0.5 is 50% to tolerance control (100% default)
p_noiseFactor float DEFAULT 0.01 -- second factor, to smooth "noise", can be 100%, 10% or 1%.
) RETURNS geometry AS $f$
WITH diam_ref AS (
SELECT *, d*fract AS tolerance
-- area metric of the SRID, not meters!
FROM ( SELECT ( SQRT(ST_Area(p_geom)) + ST_Perimeter(p_geom)/3.5 )/2.0 AS d,
p_factor*0.1 AS fract -- fraction metric
) t0
)
SELECT CASE WHEN p_both IS NOT NULL THEN ST_SimplifyPreserveTopology(geom,tolerance) ELSE geom END
FROM (
SELECT CASE -- noise reducer:
WHEN p_both IS NULL OR p_both THEN ST_viz_simplify_noise(p_geom, tolerance*p_noiseFactor, 0.1)
ELSE p_geom -- no noise reduction
END as geom,
tolerance
FROM diam_ref
) t1
$f$ LANGUAGE SQL IMMUTABLE;
CREATE or replace FUNCTION ST_viz_simplify_noiseafter( -- simplify for VIsualiZation
p_geom geometry, -- input polygon
p_both boolean DEFAULT true, -- 3state flag: NULL= only noise, true = both, false= only main
p_factor float DEFAULT 0.1, -- 1 = no reduction, 0.5 is 50% to tolerance control (100% default)
p_noiseFactor float DEFAULT 0.01 -- second factor, to smooth "noise", can be 100%, 10% or 1%.
) RETURNS geometry AS $f$
WITH diam_ref AS (
SELECT *, d*fract AS tolerance
-- area metric of the SRID, not meters!
FROM ( SELECT ( SQRT(ST_Area(p_geom)) + ST_Perimeter(p_geom)/3.5 )/2.0 AS d,
p_factor*0.1 AS fract -- fraction metric
) t0
)
SELECT CASE -- noise reducer:
WHEN p_both IS NULL OR p_both THEN ST_viz_simplify_noise(geom, tolerance*p_noiseFactor, 0.1)
ELSE geom -- no noise reduction
END
FROM (
SELECT CASE WHEN p_both IS NOT NULL THEN ST_SimplifyPreserveTopology(p_geom,tolerance) ELSE p_geom END as geom,
tolerance
FROM diam_ref
) t1
$f$ LANGUAGE SQL IMMUTABLE;
---------------------------------
CREATE TABLE lix AS
SELECT * FROM optim.jurisdiction_geom TABLESAMPLE BERNOULLI(0.8) where isolabel_ext like '%-%-%'
;
SELECT type, PERCENTILE_DISC(0.5) within group (order by abs(dif)) AS dif_absmdn,
round(AVG(abs(dif)),4) as dif_absavg, min(abs(dif)) as dif_absmin, Max(abs(dif)) as dif_absmax,
min(dif) as dif_min, Max(dif) as dif_max, count(*) as n
FROM (
select type, round(2*(km2-simple_km2)/(km2+simple_km2), 4) as dif
from (
SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
round(st_area(ST_viz_simplify_noisebefore(geom,true,1),true)/1000000.0,2) as simple_km2,
'before' AS type
FROM lix
UNION ALL
SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
round(st_area(ST_viz_simplify_noiseafter(geom,true,1),true)/1000000.0,2) as simple_km2,
'after' AS type
FROM lix
UNION ALL
SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
round(st_area(ST_viz_simplify_noiseafter(geom,false,1),true)/1000000.0,2) as simple_km2,
'main' AS type
FROM lix
UNION ALL
SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
round(st_area(ST_viz_simplify_noiseafter(geom,NULL,1),true)/1000000.0,2) as simple_km2,
'noise' AS type
FROM lix
) t
) t2 GROUP BY type ORDER BY type;
With p_factor=0.1, the default:
type | dif_absmdn | dif_absavg | dif_absmin | dif_absmax | dif_min | dif_max | n |
---|---|---|---|---|---|---|---|
after | 0.0015 | 0.0022 | 0.0000 | 0.0153 | -0.0080 | 0.0153 | 62 |
before | 0.0016 | 0.0023 | 0.0000 | 0.0139 | -0.0095 | 0.0139 | 62 |
main | 0.0016 | 0.0022 | 0.0000 | 0.0154 | -0.0079 | 0.0154 | 62 |
noise | 0.0001 | 0.0002 | 0.0000 | 0.0011 | -0.0011 | 0.0007 | 62 |
With p_factor=1, the default:
type | dif_absmdn | dif_absavg | dif_absmin | dif_absmax | dif_min | dif_max | n |
---|---|---|---|---|---|---|---|
after | 0.0389 | 0.0497 | 0.0003 | 0.1338 | -0.1338 | 0.1304 | 62 |
before | 0.0370 | 0.0513 | 0.0009 | 0.1701 | -0.1323 | 0.1701 | 62 |
main | 0.0387 | 0.0497 | 0.0000 | 0.1335 | -0.1335 | 0.1305 | 62 |
noise | 0.0015 | 0.0039 | 0.0000 | 0.0645 | -0.0187 | 0.0645 | 62 |
Conclusion, the best is with noise reducer AFTER simplify and p_percent=1. Something more as :
-- listing a sample of tests:
SELECT * , round( 2*(km2-simple_km2)/(km2+simple_km2) ,4) AS dif FROM (
SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
round(st_area(ST_viz_simplify_noiseafter(geom,true,0.9),true)/1000000.0,2) as simple_km2
FROM lix
) t ORDER BY ABS( 2*(km2-simple_km2)/(km2+simple_km2) ) DESC, 1;
isolabel_ext | km2 | simple_km2 | dif |
---|---|---|---|
UY-CL-LaPedrera | 0.16 | 0.12 | 0.2857 |
UY-RV-Masoller | 0.29 | 0.23 | 0.2308 |
UY-CO-LosPinos | 0.86 | 0.79 | 0.0848 |
UY-MA-LaBarra | 3.13 | 3.35 | -0.0679 |
BR-MS-Maracaju | 5298.39 | 5618.21 | -0.0586 |
BR-GO-Edealina | 604.46 | 635.92 | -0.0507 |
BR-PR-Jacarezinho | 602.61 | 623.98 | -0.0348 |
CO-CUN-Villagomez | 64.74 | 62.65 | 0.0328 |
CO-VAC-SanPedro | 199.63 | 205.40 | -0.0285 |
BR-RS-Sobradinho | 130.57 | 127.51 | 0.0237 |
BR-PR-SantaIzabelOeste | 319.82 | 327.25 | -0.0230 |
BR-GO-Indiara | 961.18 | 983.08 | -0.0225 |
... | ... | ... | .. |
BR-PA-Chaves | 13081.58 | 13091.43 | -0.0008 |
BR-MA-BuritiBravo | 1580.20 | 1580.98 | -0.0005 |
BR-MG-SaoRoqueMinas | 2095.21 | 2094.22 | 0.0005 |
BR-MA-Brejo | 1072.57 | 1072.84 | -0.0003 |
BR-PA-CachoeiraArari | 3111.57 | 3111.31 | 0.0001 |
Algumas heurísticas para resolver os raros casos de distorção de área:
- aplicar apenas a redução de ruído,
ST_viz_simplify_noise()
, quando a geometria em si já possui poucos pontos, como no caso de Masoller. - usar fator mais baixo quando o "fator de alongamento" for alto (
jurisdiction.info->'elongation_factor'>=4
), novamente como no caso de Masoller.
Avaliando complexidade pelo numero de pontos
select ptsmin, round(avg(spp)) as spp_avg, round(stddev(spp)) as spp_dev from (
select *, size/n_points as spp FROM (
SELECT ptsmin, round(sqrt(st_area(geom,true))) as size, ST_NPoints(geom) n_points
FROM optim.jurisdiction_geom, (select 1000 as ptsmin union all select 5000 union all select 10000 ) t0
WHERE isolabel_ext like '%-%-%'
) t1
where n_points<=ptsmin -- > or <=
) t2 group by ptsmin;
n_points | spp_avg | spp_dev |
---|---|---|
<1000 | 98 | 144 |
<5000 | 71 | 122 |
<10000 | 69 | 121 |
>1000 | 19 | 16 < 19 |
>5000 | 10 | 10 |
>10000 | 6 | 5 < 6 |
Usando o critério n_points>1000
temos uma chance razoável de medir a complexidade por "size per point", spp
. Como a média é ~20 metros, podemos supor que 10 metros seja muito baixo, justificando uma simplificação maior por ST_SimplifyPreserveTopology.
Colocando as heuristicas em uma só função: ...
Lembretes
- revisar e levar para a documentação, removendo da Wiki em seguida
- https://github.com/AddressForAll/site-v2/issues/44