Skip to content

Getting Started

Installation

# From pip
pip install geofabric

# From source
git clone https://github.com/marcostfermin/GeoFabric.git
cd GeoFabric
pip install -e "."

# Optional dependencies
pip install -e ".[viz]"
pip install -e ".[stac]"
pip install -e ".[all]"
pip install -e ".[dev,all]"

Quick Start

import geofabric as gf

ds = gf.open("file:///path/to/data.parquet")
roi = gf.roi.bbox(-74.10, 40.60, -73.70, 40.90)

q = ds.within(roi).select(["*"]).limit(1000)

q.to_parquet("out.parquet")
q.to_geojson("out.geojson")
q.to_geopackage("out.gpkg")

print(q.aggregate({"count": "*"}))

q.show()
q.to_pmtiles("out.pmtiles", layer="features", maxzoom=14)

Open a Dataset

import geofabric as gf

# Local file
ds = gf.open("file:///path/to/data.parquet")

# S3 (public bucket)
ds = gf.open("s3://bucket/data.parquet")

# GCS
ds = gf.open("gs://bucket/data.parquet")

# Azure Blob Storage
ds = gf.open("az://container/data.parquet")

# PostGIS
ds = gf.open("postgresql://user:pass@host/db?table=mytable")

# STAC catalog
ds = gf.open("stac://earth-search.aws.element84.com/v1?collection=sentinel-2-l2a")

Configure Credentials

GeoFabric supports programmatic configuration for all cloud platforms:

import geofabric as gf

# S3 credentials
gf.configure_s3(
    access_key_id="AKIA...",
    secret_access_key="...",
    region="us-east-1"
)

# PostGIS defaults (allows shorter connection strings)
gf.configure_postgis(
    host="db.example.com",
    user="myuser",
    password="mypassword",
    sslmode="require"  # SSL mode for secure connections
)

# Azure Blob Storage
gf.configure_azure(
    account_name="mystorageaccount",
    account_key="..."
)

# STAC catalogs
gf.configure_stac(
    api_key="your-api-key",
    default_catalog="https://planetarycomputer.microsoft.com/api/stac/v1"
)

# HTTP settings (proxy, timeout, SSL)
gf.configure_http(
    proxy="http://corporate-proxy:8080",
    timeout=60,
    verify_ssl=True
)

# Now use gf.open() with configured credentials
ds = gf.open("s3://my-private-bucket/data.parquet?anonymous=false")
ds = gf.open("az://container/data.parquet")
ds = gf.open("postgresql:///mydb?table=public.buildings")

# Reset all configuration
gf.reset_config()

See the Authentication Guide for detailed setup instructions.

Query and Filter

# Define a region of interest
roi = gf.roi.bbox(-74.10, 40.60, -73.70, 40.90)

# Build a query
q = ds.within(roi).select(["id", "name", "geometry"]).limit(1000)

# Add WHERE clause
q = ds.query().where("type = 'building'").limit(500)

Export Data

# Various output formats
q.to_parquet("out.parquet")
q.to_geojson("out.geojson")
q.to_geopackage("out.gpkg")
q.to_flatgeobuf("out.fgb")
q.to_csv("out.csv")

# Vector tiles (requires tippecanoe)
q.to_pmtiles("out.pmtiles", layer="features", maxzoom=14)

Spatial Operations

# Geometry transformations
q.buffer(distance=100, unit="meters")
q.simplify(tolerance=0.001)
q.transform(to_srid=3857)
q.centroid()
q.convex_hull()
q.dissolve(by="category")

# Add computed columns
q.with_area()
q.with_length()
q.with_bounds()
q.with_distance_to("POINT(0 0)")

Spatial Joins

buildings = gf.open("file:///buildings.parquet")
parcels = gf.open("file:///parcels.parquet")

# Join buildings to parcels
joined = buildings.query().sjoin(
    parcels.query(),
    predicate="intersects",
    how="inner"
)

# K-nearest neighbors
nearest = buildings.query().nearest(parcels.query(), k=3)

Visualization

# Requires viz extras: pip install -e ".[viz]"
q.show()

CLI Usage

# Show help
gf --help

# Query data
gf sql file:///data.parquet "SELECT COUNT(*) FROM data"

# Extract subset
gf pull file:///data.parquet out.parquet --where "type='building'" --limit 1000

# Dataset info
gf info file:///data.parquet
gf stats file:///data.parquet
gf validate file:///data.parquet

# Spatial operations
gf buffer file:///data.parquet out.parquet --distance 100 --unit meters
gf simplify file:///data.parquet out.parquet --tolerance 0.001
gf transform file:///data.parquet out.parquet --to-srid 3857
gf dissolve file:///data.parquet out.parquet --by category

Overture Maps

from geofabric.sources.overture import Overture

# Download Overture data (requires AWS CLI)
ov = Overture(release="2025-12-17.0", theme="base", type_="infrastructure")
local_dir = ov.download("./data/overture")

# Query downloaded data
ds = gf.open(local_dir)
sample = ds.query().limit(10000)
sample.to_parquet("sample.parquet")