Tuesday, April 27, 2021

Geohash Python example

Here's a simple Geohash encode/decode implementation. The decoding function pretty closely follows the technique layed out on the Geohash Wikipedia entry.

from fractions import Fraction

"""
First decode the special geohash variant of base32 encoding.
Each encoded digit (0-9-b..z) (not continuous abc) is a 5 bit val 0,1,2...,30,31.
In the resulting bitstream, every second bit is now for latitude and longtitude.
Initially the latitutde range is -90,+90.
When a latitude bit is 1, then it now starts at the mid of these.
Else if 0 it now ends at the mid of these.
Same for longtitude but with range -180,+180.
"""
def decode_geohash(s):
    alphabet_32ghs = "0123456789bcdefghjkmnpqrstuvwxyz"
    dec_from_32ghs = dict()
    for i, c in enumerate(alphabet_32ghs):
        dec_from_32ghs[c] = i

    bits = 0  # Integer representation of hash.
    bit_cnt = 0
    for c in s:
        bits = (bits << 5) | dec_from_32ghs[c]
        bit_cnt += 5

    # Every second bit is longtitude and latitude. Digits in even positions are latitude.
    lat_bits, lon_bits = 0, 0
    lat_bit_cnt = bit_cnt // 2
    lon_bit_cnt = lat_bit_cnt
    if bit_cnt % 2 == 1:
        lon_bit_cnt += 1

    for i in range(bit_cnt):
        cur_bit_pos = bit_cnt - i
        cur_bit = (bits & (1 << cur_bit_pos)) >> cur_bit_pos
        if i % 2 == 0:
            lat_bits |= cur_bit << (cur_bit_pos//2)
        else:
            lon_bits |= cur_bit << (cur_bit_pos//2)

    lat_start, lat_end = Fraction(-90), Fraction(90)
    for cur_bit_pos in range(lat_bit_cnt-1, -1, -1):
        mid = (lat_start + lat_end) / 2
        if lat_bits & (1 << cur_bit_pos):
            lat_start = mid
        else:
            lat_end = mid

    lon_start, lon_end = Fraction(-180), Fraction(180)
    for cur_bit_pos in range(lon_bit_cnt-1, -1, -1):
        mid = (lon_start + lon_end) / 2
        if lon_bits & (1 << cur_bit_pos):
            lon_start = mid
        else:
            lon_end = mid

    return float(lat_start), float(lat_end), float(lon_start), float(lon_end)


# Inspired by https://www.factual.com/blog/how-geohashes-work/
def encode_geohash(lat, lon, bit_cnt):
    if bit_cnt % 5 != 0:
        raise ValueError("bit_cnt must be divisible by 5")

    bits = 0
    lat_start, lat_end = Fraction(-90), Fraction(90)
    lon_start, lon_end = Fraction(-180), Fraction(180)
    for i in range(bit_cnt):
        if i % 2 == 0:
            mid = (lon_start + lon_end) / 2
            if lon < mid:
                bits = (bits << 1) | 0
                lon_end = mid
            else:
                bits = (bits << 1) | 1
                lon_start = mid
        else:
            mid = (lat_start + lat_end) / 2
            if lat < mid:
                bits = (bits << 1) | 0
                lat_end = mid
            else:
                bits = (bits << 1) | 1
                lat_start = mid

    print("bits: {:>b}".format(bits))

    # Do the special geohash base32 encoding.
    s = ""
    alphabet_32ghs = "0123456789bcdefghjkmnpqrstuvwxyz"
    for i in range(bit_cnt // 5):
        idx = (bits >> i*5) & (1 | 2 | 4 | 8 | 16)
        s += alphabet_32ghs[idx]
    return s[::-1]


print(decode_geohash("ezs42"))
print(decode_geohash("9q8y"))
print(encode_geohash(37.7, -122.5, 20))

No comments:

Post a Comment