Skip to content

teste2 ref

Peter edited this page Oct 2, 2022 · 2 revisions

Diâmetro de referência

Pensando que a relação entre perímetro da forma e sua "diagonal" é da ordem de 3 ou mais, a query abaixo permite avaliar qual o fator que melhor aproxima a raiz da área com o fator do perímetro:

SELECT count(*) n FROM optim.jurisdiction_geom where isolabel_ext like '%-%-%'; -- 8781

-- Find perim3_km factor that minimizes d_median, d_avg and d_dev; and avoid negative d_min or extreme d_max
SELECT round(PERCENTILE_DISC(0.5) within group (order by abs(d)) ) as d_median,
       round(avg(abs(d))) as d_avg, round(stddev_pop(abs(d))) as d_dev,
       max(abs(d)) as d_max, min(abs(d)) as d_min, min(d) as signed_min,
       count(*) filter (where d<0) as signed_n
FROM (
 SELECT 200.0*(perim3_km-km2_to_km)/(perim3_km+km2_to_km) AS d -- percentual difference
 FROM (
   select *, round(sqrt(km2),2)::float as km2_to_km  -- query for visual check
   from (
     select osm_id, isolabel_ext,  st_area(geom,true)/1000000.0 as km2,
            st_perimeter(geom,true)/3500.0 as perim3_km  -- tested:  3000, 3500, 4000 (ok), 4500 (generated negative d_min and more d_dev)
     from optim.jurisdiction_geom where isolabel_ext like '%-%-%'
   ) t1
 ) t2
)t3;  -- AVG of tested permeters: 66.3, 52.4, 28.6

Parece então razoável, depois dessa aproximação de ordem de grandeza, fazer a média entre os dois, quando se tratando de geometrias de jurisdições:
diam_ref = (SQRT(area)+perimetro/3.5)/2.0

Resultados, usando 8781 geometrias de municípios de optim.jurisdiction_geom. Valores das diferenças percentuais execeto pelo fator:

perim_factor d_median d_avg d_dev d_max d_min signed_min
3.0 65 66 17 167.80 16.66 16.66
3.5 51 52 18 162.93 1.32 1.32
4.0 38 40 19 158.19 0.0294 -12.05
4.5 27 29 18 153.57 0.0101 -23.70

A quantidade de negativos foi alta, 439, no fator 4.5. Como são esperadas mais geometrias e o é desvio alto, o ajuste é grosseiro, quantizado em 0.5.

Simplificação da geometria para uso em interface SVG

A simplificação é realizada em duas etapas, a primeira mais forte com ST_SimplifyPreserveTopology e a segunda "grudando dobras ilhas na escala menor", através de ST_Buffer(+ depois -), depois ST_SimplifyVW para remover redundâncias dentro da mesma escala.

Nota: segundo referências 1 e 2 a suavisação por buffer não seria necessária quando usando algorítmo de Visvalingam–Whyatt. Sugere-se a realização de testes e comparações antes de eliminar o buffer.

DROP FUNCTION ST_viz_simplify_noise;
DROP FUNCTION ST_viz_simplify_noisebefore;
DROP FUNCTION ST_viz_simplify_noiseafter;

CREATE or replace FUNCTION ST_viz_simplify_noise(  -- simplify by buffer-eroding 
    p_geom      geometry,             -- input polygon
    p_radius    float,                -- buffer radius
    p_VWfactor float   DEFAULT 0.1    -- reduce buffer radius by a factor, to use as tolerance in ST_SimplifyVW
) RETURNS geometry AS $f$
   SELECT ST_SimplifyVW(
      ST_Buffer(ST_Buffer(p_geom,p_radius),-p_radius),
      p_radius*p_VWfactor
   )
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION ST_viz_simplify_noisebefore(  -- simplify for VIsualiZation 
    p_geom      geometry,               -- input polygon
    p_both        boolean DEFAULT true,   -- 3state flag: NULL= only noise, true = both, false= only main
    p_factor      float   DEFAULT 0.1,       -- 1 = no reduction, 0.5 is 50% to tolerance control (100% default)
    p_noiseFactor float   DEFAULT 0.01    -- second factor, to smooth "noise", can be 100%, 10% or 1%.
) RETURNS geometry AS $f$
  WITH   diam_ref AS (
    SELECT *, d*fract AS tolerance 
    -- area metric of the SRID, not meters!
    FROM ( SELECT ( SQRT(ST_Area(p_geom)) + ST_Perimeter(p_geom)/3.5 )/2.0  AS  d,
           p_factor*0.1 AS fract -- fraction metric
    ) t0
  )
  SELECT CASE WHEN p_both IS NOT NULL THEN ST_SimplifyPreserveTopology(geom,tolerance) ELSE geom END
  FROM (
    SELECT CASE   -- noise reducer:
       WHEN  p_both IS NULL OR p_both THEN ST_viz_simplify_noise(p_geom, tolerance*p_noiseFactor, 0.1)
       ELSE p_geom          -- no noise reduction
     END as geom,
     tolerance
    FROM diam_ref
  ) t1
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION ST_viz_simplify_noiseafter(  -- simplify for VIsualiZation 
    p_geom        geometry,               -- input polygon
    p_both        boolean DEFAULT true,   -- 3state flag: NULL= only noise, true = both, false= only main
    p_factor      float   DEFAULT 0.1,       -- 1 = no reduction, 0.5 is 50% to tolerance control (100% default)
    p_noiseFactor float   DEFAULT 0.01    -- second factor, to smooth "noise", can be 100%, 10% or 1%.
) RETURNS geometry AS $f$
  WITH   diam_ref AS (
    SELECT *, d*fract AS tolerance 
    -- area metric of the SRID, not meters!
    FROM ( SELECT ( SQRT(ST_Area(p_geom)) + ST_Perimeter(p_geom)/3.5 )/2.0  AS  d,
           p_factor*0.1 AS fract -- fraction metric
    ) t0
  )
  SELECT CASE   -- noise reducer:
     WHEN p_both IS NULL OR p_both THEN ST_viz_simplify_noise(geom, tolerance*p_noiseFactor, 0.1)
     ELSE geom  -- no noise reduction
    END
  FROM (
    SELECT CASE WHEN p_both IS NOT NULL THEN ST_SimplifyPreserveTopology(p_geom,tolerance) ELSE p_geom END as geom,
           tolerance
    FROM diam_ref
  ) t1
$f$ LANGUAGE SQL IMMUTABLE;

---------------------------------
CREATE TABLE lix AS
  SELECT * FROM optim.jurisdiction_geom TABLESAMPLE BERNOULLI(0.8) where isolabel_ext like '%-%-%'
;
SELECT type, PERCENTILE_DISC(0.5) within group (order by abs(dif)) AS dif_absmdn,
       round(AVG(abs(dif)),4) as dif_absavg, min(abs(dif)) as dif_absmin, Max(abs(dif)) as dif_absmax,
       min(dif) as dif_min, Max(dif) as dif_max, count(*) as n
FROM (
 select  type, round(2*(km2-simple_km2)/(km2+simple_km2), 4) as dif
 from (
  SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
         round(st_area(ST_viz_simplify_noisebefore(geom,true,1),true)/1000000.0,2) as simple_km2,
         'before' AS type
  FROM lix
  UNION ALL
  SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
         round(st_area(ST_viz_simplify_noiseafter(geom,true,1),true)/1000000.0,2) as simple_km2, 
         'after' AS type
  FROM lix
  UNION ALL
  SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
         round(st_area(ST_viz_simplify_noiseafter(geom,false,1),true)/1000000.0,2) as simple_km2, 
         'main' AS type
  FROM lix
  UNION ALL
  SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
         round(st_area(ST_viz_simplify_noiseafter(geom,NULL,1),true)/1000000.0,2) as simple_km2, 
         'noise' AS type
  FROM lix
 ) t
) t2 GROUP BY type ORDER BY type;

With p_factor=0.1, the default:

type dif_absmdn dif_absavg dif_absmin dif_absmax dif_min dif_max n
after 0.0015 0.0022 0.0000 0.0153 -0.0080 0.0153 62
before 0.0016 0.0023 0.0000 0.0139 -0.0095 0.0139 62
main 0.0016 0.0022 0.0000 0.0154 -0.0079 0.0154 62
noise 0.0001 0.0002 0.0000 0.0011 -0.0011 0.0007 62

With p_factor=1, the default:

type dif_absmdn dif_absavg dif_absmin dif_absmax dif_min dif_max n
after 0.0389 0.0497 0.0003 0.1338 -0.1338 0.1304 62
before 0.0370 0.0513 0.0009 0.1701 -0.1323 0.1701 62
main 0.0387 0.0497 0.0000 0.1335 -0.1335 0.1305 62
noise 0.0015 0.0039 0.0000 0.0645 -0.0187 0.0645 62

Conclusion, the best is with noise reducer AFTER simplify and p_percent=1. Something more as :

  -- listing a sample of tests:
  SELECT * , round( 2*(km2-simple_km2)/(km2+simple_km2) ,4) AS dif FROM (
    SELECT isolabel_ext, round(st_area(geom,true)/1000000.0,2) as km2,
         round(st_area(ST_viz_simplify_noiseafter(geom,true,0.9),true)/1000000.0,2) as simple_km2
    FROM lix
  ) t ORDER BY ABS( 2*(km2-simple_km2)/(km2+simple_km2) ) DESC, 1;
isolabel_ext km2 simple_km2 dif
UY-CL-LaPedrera 0.16 0.12 0.2857
UY-RV-Masoller 0.29 0.23 0.2308
UY-CO-LosPinos 0.86 0.79 0.0848
UY-MA-LaBarra 3.13 3.35 -0.0679
BR-MS-Maracaju 5298.39 5618.21 -0.0586
BR-GO-Edealina 604.46 635.92 -0.0507
BR-PR-Jacarezinho 602.61 623.98 -0.0348
CO-CUN-Villagomez 64.74 62.65 0.0328
CO-VAC-SanPedro 199.63 205.40 -0.0285
BR-RS-Sobradinho 130.57 127.51 0.0237
BR-PR-SantaIzabelOeste 319.82 327.25 -0.0230
BR-GO-Indiara 961.18 983.08 -0.0225
... ... ... ..
BR-PA-Chaves 13081.58 13091.43 -0.0008
BR-MA-BuritiBravo 1580.20 1580.98 -0.0005
BR-MG-SaoRoqueMinas 2095.21 2094.22 0.0005
BR-MA-Brejo 1072.57 1072.84 -0.0003
BR-PA-CachoeiraArari 3111.57 3111.31 0.0001

Algumas heurísticas para resolver os raros casos de distorção de área:

  • aplicar apenas a redução de ruído, ST_viz_simplify_noise(), quando a geometria em si já possui poucos pontos, como no caso de Masoller.
  • usar fator mais baixo quando o "fator de alongamento" for alto (jurisdiction.info->'elongation_factor'>=4), novamente como no caso de Masoller.

Avaliando complexidade pelo numero de pontos

select ptsmin, round(avg(spp)) as spp_avg, round(stddev(spp)) as spp_dev from (
  select *, size/n_points as spp FROM (
    SELECT ptsmin, round(sqrt(st_area(geom,true))) as size, ST_NPoints(geom) n_points
    FROM optim.jurisdiction_geom,  (select 1000 as ptsmin union all select 5000 union all select 10000 ) t0
    WHERE isolabel_ext like '%-%-%'
  ) t1
  where n_points<=ptsmin   -- > or <=
) t2 group by ptsmin;
n_points spp_avg spp_dev
<1000 98 144
<5000 71 122
<10000 69 121
>1000 19 16 < 19
>5000 10 10
>10000 6 5 < 6

Usando o critério n_points>1000 temos uma chance razoável de medir a complexidade por "size per point", spp. Como a média é ~20 metros, podemos supor que 10 metros seja muito baixo, justificando uma simplificação maior por ST_SimplifyPreserveTopology.

Colocando as heuristicas em uma só função: ...


Lembretes