Postgis - Faster Spatial Join

  • Testing Environment
    • Two CentOS 7 virtual machines (1 master + 1 data host) run on my laptop (i5 2.4 GHz CPU, 8Gb RAM)
    • Database: Deepgreen
  • First follow the example on http://blog.cleverelephant.ca/2018/09/postgis-external-storage.html. Then use DGIS_sj_Intersects() instead of ST_Intersects()
  • Results:
    • ST_Intersects and normal storage: 444345.343 ms
    • ST_Intersects and external storage: 293202.550 ms
    • DGIS_sj_Intersects and external storage: 84984.251 ms
test-postgis-st_intersect.sh

#!/bin/bash

 

zip1=ne_10m_admin_0_countries.zip

zip2=ne_10m_populated_places.zip

link=https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural

 

if [ ! -f $zip1 ]; then

   wget $link/$zip1

   unzip -o $zip1

fi

if [ ! -f $zip2 ]; then

   wget $link/$zip2

   unzip -o $zip2

fi

 

db=test-postgis

sql=/tmp/test-postgis.sql

 

createdb $db

shp2pgsql -s 4326  ne_10m_admin_0_countries.shp countries | psql $db

shp2pgsql -s 4326  ne_10m_populated_places.shp places | psql $db

 

cat >$sql <<END

\timing on

 

SELECT count(*) as cnt, c.name

  FROM countries c

  JOIN places p

  ON ST_Intersects(c.geom, p.geom)

  GROUP BY c.name order by cnt desc limit 1;

 

ALTER TABLE countries

  ALTER COLUMN geom

  SET STORAGE EXTERNAL;

 

UPDATE countries

  SET geom = ST_SetSRID(geom, 4326);

 

SELECT count(*) as cnt, c.name

  FROM countries c

  JOIN places p

  ON ST_Intersects(c.geom, p.geom)

  GROUP BY c.name order by cnt desc limit 1;

 

SELECT count(*) as cnt, c.name

  FROM countries c

  JOIN places p

  ON DGIS_sj_Intersects(c.geom, p.geom)

  GROUP BY c.name order by cnt desc limit 1;

 

END

 

psql -d $db -f $sql

 

dropdb $db

Theme by Danetsoft and Danang Probo Sayekti inspired by Maksimer