Skip to content

thresholding module

hydrafloods.thresholding

bmax_otsu(img, band=None, region=None, scale=90, initial_threshold=0, thresh_no_data=None, invert=False, grid_size=0.1, bmax_threshold=0.75, iters=1, max_boxes=100, seed=7, max_buckets=255, min_bucket_width=0.001, max_raw=1000000.0, return_threshold=False)

Implementation of the B-Max Otsu thresholding algorithm. Detailed explanation of algorithm can be found at https://doi.org/10.3390/rs12152469

Parameters:

Name Type Description Default
img ee.Image

input image to thresholding algorithm

required
band str | None,optional

band name to use for thresholding, if set to None will use first band in image. default = None

None
region ee.Geometry | None

region to determine threshold, if set to None will use img.geometry(). default = None

None
scale int

scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90

90
initial_threshold float

initial estimate of water/no-water for estimating the probabilities of classes in segment. default = 0

0
thresh_no_data float

threshold to be used when histogram is empty (likely little to no water present in image). default = -0.2

None
invert bool

boolean switch to determine if to threshold greater than (True) or less than (False). default = False

False
grid_size float

size in decimal degrees to tile image/region to check for bimodality. default = 0.1

0.1
bmax_threshold float

value 0-1 to determine if a value of bmax is bimodal or not. default = 0.75

0.75
iters int

number of iterations to successively shrink the bmax tiles to refine threshold. 1 uses defined grid_size. default = 1

1
max_boxes int

maximum number of tiles/boxes to use when determining threshold, will select based on maximum bmax ordering. default = None

100
seed int

random number generator seed for randomly selected max_boxes. default = 7

7
max_buckets int

The maximum number of buckets to use when building a histogram; will be rounded up to a power of 2. default = 255

255
min_bucket_width float

The minimum histogram bucket width to allow any power of 2. default = 0.001

0.001
max_raw int

The number of values to accumulate before building the initial histogram. default = 1e6

1000000.0
return_threshold bool

boolean switch, if set to true then function will return threshold number, else thresholded image. default = False

False

Returns:

Type Description
ee.Image

thresholded image based (if return_threshold==False) or threshold value (if return_threshold==True) based on the threshold determined by the algorithm

Source code in hydrafloods/thresholding.py
@decorators.keep_attrs
def bmax_otsu(
    img,
    band=None,
    region=None,
    scale=90,
    initial_threshold=0,
    thresh_no_data=None,
    invert=False,
    grid_size=0.1,
    bmax_threshold=0.75,
    iters=1,
    max_boxes=100,
    seed=7,
    max_buckets=255,
    min_bucket_width=0.001,
    max_raw=1e6,
    return_threshold=False,
):
    """Implementation of the B-Max Otsu thresholding algorithm.
    Detailed explanation of algorithm can be found at https://doi.org/10.3390/rs12152469

    args:
        img (ee.Image): input image to thresholding algorithm
        band (str | None,optional): band name to use for thresholding, if set to `None` will use first band in image. default = None
        region (ee.Geometry | None, optional): region to determine threshold, if set to `None` will use img.geometry(). default = None
        scale (int, optional): scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90
        initial_threshold (float, optional): initial estimate of water/no-water for estimating the probabilities of classes in segment. default = 0
        thresh_no_data (float, optional): threshold to be used when histogram is empty (likely little to no water present in image). default = -0.2
        invert (bool, optional): boolean switch to determine if to threshold greater than (True) or less than (False). default = False
        grid_size (float, optional): size in decimal degrees to tile image/region to check for bimodality. default = 0.1
        bmax_threshold (float, optional): value 0-1 to determine if a value of bmax is bimodal or not. default = 0.75
        iters (int, optional): number of iterations to successively shrink the bmax tiles to refine threshold. 1 uses defined grid_size. default = 1
        max_boxes (int, optional): maximum number of tiles/boxes to use when determining threshold, will select based on maximum bmax ordering. default = None
        seed (int, optional): random number generator seed for randomly selected max_boxes. default = 7
        max_buckets (int, optional): The maximum number of buckets to use when building a histogram; will be rounded up to a power of 2. default = 255
        min_bucket_width (float, optional): The minimum histogram bucket width to allow any power of 2. default = 0.001
        max_raw (int, optional): The number of values to accumulate before building the initial histogram. default = 1e6
        return_threshold (bool, optional): boolean switch, if set to true then function will return threshold number, else thresholded image. default = False

    returns:
        ee.Image: thresholded image based (if return_threshold==False) or threshold value (if return_threshold==True) based on the threshold determined by the algorithm
    """

    def calcBmax(feature):
        """Closure function to calculate Bmax for each feature covering image"""
        segment = img
        initial = segment.lt(initial_threshold)
        p1 = ee.Number(
            initial.reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=feature.geometry(),
                bestEffort=True,
                scale=scale,
            ).get(histBand)
        )
        p1 = ee.Number(ee.Algorithms.If(p1, p1, 0.9999))
        p2 = ee.Number(1).subtract(p1)

        m = (
            segment.updateMask(initial)
            .rename("m1")
            .addBands(segment.updateMask(initial.Not()).rename("m2"))
        )

        mReduced = m.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=feature.geometry(),
            bestEffort=True,
            scale=scale,
        )

        m1 = ee.Number(mReduced.get("m1"))
        m2 = ee.Number(mReduced.get("m2"))

        m1 = ee.Number(ee.Algorithms.If(m1, m1, -25))
        m2 = ee.Number(ee.Algorithms.If(m2, m2, 0))

        sigmab = p1.multiply(p2.multiply(m1.subtract(m2).pow(2)))
        sigmat = ee.Number(
            segment.reduceRegion(
                reducer=ee.Reducer.variance(),
                geometry=feature.geometry(),
                bestEffort=True,
                scale=scale,
            ).get(histBand)
        )
        sigmat = ee.Number(ee.Algorithms.If(sigmat, sigmat, 2))
        bmax = sigmab.divide(sigmat)
        return feature.set({"bmax": bmax})

    if band is None:
        img = img.select([0])
        histBand = ee.String(img.bandNames().get(0))

    else:
        histBand = ee.String(band)
        img = img.select(histBand)

    if region is None:
        region = img.geometry()

    grid = geeutils.tile_region(region, centroid_within=region, grid_size=grid_size)

    bmaxes = grid.map(calcBmax).filter(ee.Filter.gt("bmax", bmax_threshold))

    if iters > 1:
        for i in range(iters - 1):
            grid_size /= 2.0
            grid = geeutils.tile_region(
                bmaxes.geometry(), centroid_within=bmaxes, grid_size=grid_size
            )

            bmaxes = grid.map(calcBmax).filter(ee.Filter.gt("bmax", bmax_threshold))

    if max_boxes is not None:
        selection = bmaxes.limit(max_boxes, "bmax")
    else:
        selection = bmaxes

    histogram = img.reduceRegion(
        ee.Reducer.histogram(max_buckets, min_bucket_width, max_raw)
        .combine("mean", None, True)
        .combine("variance", None, True),
        selection,
        scale,
        bestEffort=True,
        tileScale=16,
    )

    if thresh_no_data is not None:
        threshold = ee.Number(
            ee.Algorithms.If(
                ee.Dictionary(histogram.get(histBand.cat("_histogram"))).contains(
                    "bucketMeans"
                ),
                otsu(histogram.get(histBand.cat("_histogram"))),
                thresh_no_data,
            )
        )
    else:
        threshold = otsu(histogram.get(histBand.cat("_histogram")))

    if return_threshold is True:
        return ee.Image(threshold)
    else:
        water = ee.Image(ee.Algorithms.If(invert, img.gt(threshold), img.lt(threshold)))
        return water.rename("water").uint8()

edge_otsu(img, band=None, region=None, scale=90, initial_threshold=0, thresh_no_data=None, invert=False, canny_threshold=0.05, canny_sigma=0, canny_lt=0.05, connected_pixels=200, edge_length=50, edge_buffer=100, max_buckets=255, min_bucket_width=0.001, max_raw=1000000.0, return_threshold=False)

Implementation of the Edge Otsu thresholding algorithm. Detailed explanation of algorithm can be found at https://doi.org/10.3390/rs12152469

Parameters:

Name Type Description Default
img ee.Image

input image to thresholding algorithm

required
band str | None,optional

band name to use for thresholding, if set to None will use first band in image. default = None

None
region ee.Geometry | None

region to determine threshold, if set to None will use img.geometry(). default = None

None
scale int

scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90

90
initial_threshold float

initial estimate of water/no-water for estimating the edges. default = 0

0
thresh_no_data float

threshold to be used when histogram is empty (likely little to no water present in image). default = -0.2

None
invert bool

boolean switch to determine if to threshold greater than (True) or less than (False). default = False

False
canny_threshold float

threshold for canny edge detection. default = 0.05

0.05
canny_sigma float

sigma value for gaussian filter in canny edge detection. default = 0

0
canny_lt float

lower threshold for canny detection. default = 0.05

0.05
connected_pixels int

maximum size of the neighborhood in pixels to determine if connected. default = 200

200
edge_length int

minimum length of edges from canny detection to be considered edge. default = 50

50
edge_buffer int

distance in meters to buffer edges on a side for histogram sampling. default = 100

100
max_buckets int

The maximum number of buckets to use when building a histogram; will be rounded up to a power of 2. default = 255

255
min_bucket_width float

The minimum histogram bucket width to allow any power of 2. default = 0.001

0.001
max_raw int

The number of values to accumulate before building the initial histogram. default = 1e6

1000000.0
return_threshold bool

boolean switch, if set to true then function will return threshold number, else thresholded image. default = False

False

Returns:

Type Description
ee.Image

thresholded image based (if return_threshold==False) or threshold value (if return_threshold==True) based on the threshold determined by the algorithm

Source code in hydrafloods/thresholding.py
@decorators.keep_attrs
def edge_otsu(
    img,
    band=None,
    region=None,
    scale=90,
    initial_threshold=0,
    thresh_no_data=None,
    invert=False,
    canny_threshold=0.05,
    canny_sigma=0,
    canny_lt=0.05,
    connected_pixels=200,
    edge_length=50,
    edge_buffer=100,
    max_buckets=255,
    min_bucket_width=0.001,
    max_raw=1e6,
    return_threshold=False,
):
    """Implementation of the Edge Otsu thresholding algorithm.
    Detailed explanation of algorithm can be found at https://doi.org/10.3390/rs12152469

    args:
        img (ee.Image): input image to thresholding algorithm
        band (str | None,optional): band name to use for thresholding, if set to `None` will use first band in image. default = None
        region (ee.Geometry | None, optional): region to determine threshold, if set to `None` will use img.geometry(). default = None
        scale (int, optional): scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90
        initial_threshold (float, optional): initial estimate of water/no-water for estimating the edges. default = 0
        thresh_no_data (float, optional): threshold to be used when histogram is empty (likely little to no water present in image). default = -0.2
        invert (bool, optional): boolean switch to determine if to threshold greater than (True) or less than (False). default = False
        canny_threshold (float, optional): threshold for canny edge detection. default = 0.05
        canny_sigma (float, optional): sigma value for gaussian filter in canny edge detection. default = 0
        canny_lt (float, optional): lower threshold for canny detection. default = 0.05
        connected_pixels (int, optional): maximum size of the neighborhood in pixels to determine if connected. default = 200
        edge_length (int, optional): minimum length of edges from canny detection to be considered edge. default = 50
        edge_buffer (int, optional): distance in meters to buffer edges on a side for histogram sampling. default = 100
        max_buckets (int, optional): The maximum number of buckets to use when building a histogram; will be rounded up to a power of 2. default = 255
        min_bucket_width (float, optional): The minimum histogram bucket width to allow any power of 2. default = 0.001
        max_raw (int, optional): The number of values to accumulate before building the initial histogram. default = 1e6
        return_threshold (bool, optional): boolean switch, if set to true then function will return threshold number, else thresholded image. default = False

    returns:
        ee.Image: thresholded image based (if return_threshold==False) or threshold value (if return_threshold==True) based on the threshold determined by the algorithm
    """

    if band is None:
        img = img.select([0])
        histBand = ee.String(img.bandNames().get(0))

    else:
        histBand = ee.String(band)
        img = img.select(histBand)

    if region is None:
        region = img.geometry()

    binary = img.lt(initial_threshold).rename("binary")

    # get canny edges
    canny = ee.Algorithms.CannyEdgeDetector(binary, canny_threshold, canny_sigma)
    # process canny edges
    connected = (
        canny.mask(canny).lt(canny_lt).connectedPixelCount(connected_pixels, True)
    )
    edges = connected.gte(edge_length)
    edgeBuffer = edges.focal_max(edge_buffer, "square", "meters")

    # mask out areas to get histogram for Otsu
    histogram_image = img.updateMask(edgeBuffer)

    histogram = histogram_image.reduceRegion(
        ee.Reducer.histogram(max_buckets, min_bucket_width, max_raw)
        .combine("mean", None, True)
        .combine("variance", None, True),
        region,
        scale,
        bestEffort=True,
        tileScale=16,
    )

    if thresh_no_data is not None:
        threshold = ee.Number(
            ee.Algorithms.If(
                ee.Dictionary(histogram.get(histBand.cat("_histogram"))).contains(
                    "bucketMeans"
                ),
                otsu(histogram.get(histBand.cat("_histogram"))),
                thresh_no_data,
            )
        )
    else:
        threshold = otsu(histogram.get(histBand.cat("_histogram")))

    if return_threshold is True:
        return threshold
    else:
        water = ee.Image(ee.Algorithms.If(invert, img.gt(threshold), img.lt(threshold)))
        return water.rename("water").uint8()

fuzzy_otsu(img, band=None, region=None, scale=90, initial_threshold=-15.5, grid_size=0.1, max_boxes=20, seed=0, max_buckets=255, min_bucket_width=0.001, max_raw=1000000.0)

Implementation of Otsu thresholding algorithm. Segment the grids into water and land representation using initial threshold and randomly select features to calculate minimum and maximum threshold of the image. Calculate Midpoint of min and max threshold using Fuzzy_Gaussian to map water.

Parameters:

Name Type Description Default
img ee.Image

input image to thresholding algorithm

required
band str | None,optional

band name to use for thresholding, if set to None will use first band in image. default = None

None
region ee.Geometry | None

region to determine threshold, if set to None will use img.geometry(). default = None

None
scale int

scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90

90
initial_threshold float

initial estimate of water/no-water for estimating the probabilities of classes in segment. default = -15.5

-15.5
grid_size float

size in decimal degrees to tile image/region to check for bimodality. default = 0.1

0.1
max_boxes int

maximum number of tiles/boxes to use when determining threshold. default = 20

20
seed int

random number generator seed for randomly selected max_boxes. default = 7

0
max_buckets int

The maximum number of buckets to use when building a histogram; will be rounded up to a power of 2. default = 255

255
min_bucket_width float

The minimum histogram bucket width to allow any power of 2. default = 0.001

0.001
max_raw int

The number of values to accumulate before building the initial histogram. default = 1e6

1000000.0

Returns:

Type Description
ee.Image

thresholded water image.

Source code in hydrafloods/thresholding.py
@decorators.keep_attrs
def fuzzy_otsu(
    img,
    band=None,
    region=None,
    scale=90,
    initial_threshold=-15.5,
    grid_size=0.1,
    max_boxes=20,
    seed=0,
    max_buckets=255,
    min_bucket_width=0.001,
    max_raw=1e6,
):
    """Implementation of Otsu thresholding algorithm.
    Segment the grids into water and land representation using initial threshold and randomly select features to calculate
    minimum and maximum threshold of the image. Calculate Midpoint of min and max threshold using Fuzzy_Gaussian to map water.


    args:
        img (ee.Image): input image to thresholding algorithm
        band (str | None,optional): band name to use for thresholding, if set to `None` will use first band in image. default = None
        region (ee.Geometry | None, optional): region to determine threshold, if set to `None` will use img.geometry(). default = None
        scale (int, optional): scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90
        initial_threshold (float, optional): initial estimate of water/no-water for estimating the probabilities of classes in segment. default = -15.5
        grid_size (float, optional): size in decimal degrees to tile image/region to check for bimodality. default = 0.1
        max_boxes (int, optional): maximum number of tiles/boxes to use when determining threshold. default = 20
        seed (int, optional): random number generator seed for randomly selected max_boxes. default = 7
        max_buckets (int, optional): The maximum number of buckets to use when building a histogram; will be rounded up to a power of 2. default = 255
        min_bucket_width (float, optional): The minimum histogram bucket width to allow any power of 2. default = 0.001
        max_raw (int, optional): The number of values to accumulate before building the initial histogram. default = 1e6

    returns:
        ee.Image: thresholded water image.
    """

    # Function to Segment Land and Water Grids
    def segment_grid(feature):
        counts = initImg.reduceRegion(
            reducer=ee.Reducer.sum(),
            geometry=feature.geometry(),
            bestEffort=True,
            scale=scale,
        )

        return feature.set(counts)

    if region is None:
        region = img.geometry()

    if band is None:
        img = img.select([0])
        histBand = ee.String(img.bandNames().get(0))

    else:
        histBand = ee.String(band)
        img = img.select(histBand)

    grid = geeutils.tile_region(region, intersect_geom=region, grid_size=grid_size)

    # Prepare good representation of water and land
    initWater = img.lt(initial_threshold).rename("water")
    initImg = initWater.addBands(initWater.Not().rename("land"))

    grid = grid.map(segment_grid)

    # Select water and land grid
    selection = grid.filter(
        ee.Filter.And(ee.Filter.gt("water", 1000), ee.Filter.gt("land", 1000))
    )

    # Randomly select features
    selection = selection.randomColumn("random")
    nBoxes = ee.Number(selection.size())
    randomSelection = ee.Number(max_boxes).divide(nBoxes)
    selection = selection.filter(ee.Filter.lt("random", randomSelection))

    # Create histogram for selected features to calculate min threshold
    histogram = img.reduceRegion(
        ee.Reducer.histogram(max_buckets, min_bucket_width, max_raw)
        .combine("mean", None, True)
        .combine("variance", None, True),
        selection.geometry(),
        scale,
        bestEffort=True,
        tileScale=16,
    )

    min_threshold = otsu(histogram.get(histBand.cat("_histogram")))

    # Create histogram to calculate max threshold
    histogram2 = img.updateMask(img.lt(min_threshold)).reduceRegion(
        ee.Reducer.histogram(max_buckets, min_bucket_width, max_raw)
        .combine("mean", None, True)
        .combine("variance", None, True),
        selection.geometry(),
        scale,
        bestEffort=True,
        tileScale=16,
    )

    max_threshold = otsu(histogram2.get(histBand.cat("_histogram")))

    # Calculate Midpoint of min and max threshold using Fuzzy_Gaussian
    midpoint = ee.Number(ee.Number(min_threshold).add(ee.Number(max_threshold))).divide(
        2
    )
    spread = 0.2

    gauss = fuzzy.fuzzy_gaussian(img, midpoint, spread).clip(region)

    waterImg = img.lt(midpoint)

    waterImg = waterImg.where(waterImg.eq(0), gauss)

    return ee.Image(waterImg)

kmeans_extent(img, hand, initial_threshold=0, region=None, band=None, n_samples=500, seed=7, invert=False, scale=90)

Water thresholding methodology using image values and HAND. Method from https://doi.org/10.1016/j.rse.2020.111732

Parameters:

Name Type Description Default
img ee.Image

input image to thresholding algorithm

required
hand ee.Image

Height Above Nearest Drainage image used as axis in clustering

required
initial_threshold float

initial estimate of water/no-water for stratified sampling. default = 0

0
region ee.Geometry | None

region to sample values for KMeans clustering, if set to None will use img.geometry(). default = None

None
band str | None,optional

band name to use for thresholding, if set to None will use first band in image. default = None

None
samples int

number of stratified samples to gather for clustering from the initial water/no-water map. default=500

required
seed int

random number generator seed for sampling. default = 7

7
scale int

scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90

90

Returns:

Type Description
ee.Image

clustered image from KMeans clusterer. Classes assumed to be water/no-water

Source code in hydrafloods/thresholding.py
def kmeans_extent(
    img,
    hand,
    initial_threshold=0,
    region=None,
    band=None,
    n_samples=500,
    seed=7,
    invert=False,
    scale=90,
):
    """Water thresholding methodology using image values and HAND.
    Method from https://doi.org/10.1016/j.rse.2020.111732

    args:
        img (ee.Image): input image to thresholding algorithm
        hand (ee.Image): Height Above Nearest Drainage image used as axis in clustering
        initial_threshold (float, optional): initial estimate of water/no-water for stratified sampling. default = 0
        region (ee.Geometry | None, optional): region to sample values for KMeans clustering, if set to `None` will use img.geometry(). default = None
        band (str | None,optional): band name to use for thresholding, if set to `None` will use first band in image. default = None
        samples (int, optional): number of stratified samples to gather for clustering from the initial water/no-water map. default=500
        seed (int, optional): random number generator seed for sampling. default = 7
        scale (int, optional): scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90

    returns:
        ee.Image: clustered image from KMeans clusterer. Classes assumed to be water/no-water

    """

    def _cluster_center(x):
        return ee.Number(
            (
                band_arr.mask(classes.eq(ee.Number(x))).reduce(ee.Reducer.mean(), [0])
            ).get([0])
        )

    if region is None:
        region = img.geometry()

    if band is None:
        img = img.select([0])
        band = ee.String(img.bandNames().get(0))

    else:
        img = img.select(band)

    hand_band = ee.String(hand.bandNames().get(0))

    # convert invert to a number 0 or 1 for ee server-side
    invert = 1 if invert else 0

    img = img.addBands(hand.unmask(0))

    strata = img.select(band).gt(initial_threshold).rename("strata")

    samples = img.addBands(strata).stratifiedSample(
        numPoints=n_samples,
        classBand="strata",
        region=region,
        scale=scale,
        classValues=[0, 1],
        classPoints=[n_samples, n_samples],
        tileScale=16,
        seed=seed,
    )

    clusterer = ee.Clusterer.wekaKMeans(2, 1).train(samples, [band, hand_band])

    samples = samples.cluster(clusterer, "classes")
    classes = samples.aggregate_array("classes")
    unique = classes.distinct().sort()
    classes = ee.Array(classes)
    band_arr = ee.Array(samples.aggregate_array(band))

    class_means = unique.map(_cluster_center)

    min_mean = class_means.reduce(ee.Reducer.min())

    water_class = class_means.indexOf(min_mean)

    water = img.cluster(clusterer)

    water = ee.Image(ee.Algorithms.If(water_class.eq(0), water.Not(), water))

    return water.rename("water").uint8()

modis_flood(img)

Implementation of the NASA MODIS Flood algorithm. Expects that imagery has

https://cdn.earthdata.nasa.gov/conduit/upload/17162/MCDWD_UserGuide_RevB.pdf

Parameters:

Name Type Description Default
img ee.Image

image to apply algorithm to, should be either MODIS or VIIRS sensor data

required

Returns:

Type Description
ee.Image

resulting water map

Source code in hydrafloods/thresholding.py
@decorators.keep_attrs
def modis_flood(img):
    """Implementation of the NASA  MODIS Flood algorithm. Expects that imagery
    has

    https://cdn.earthdata.nasa.gov/conduit/upload/17162/MCDWD_UserGuide_RevB.pdf

    args:
        img (ee.Image): image to apply algorithm to, should be either MODIS or VIIRS sensor data

    returns:
        ee.Image: resulting water map

    """
    # coefficients for the algorithm
    a = 0.0013500
    b = 0.10811
    c = 0.7
    d = 0.2027
    e = 0.06757

    water = img.expression("((nir+a)/(red+b) < c) and (red < d) and (swir2 < e)",{
        "red": img.select("red"),
        "nir": img.select("nir"),
        "swir2": img.select("swir2"),
        "a": a,
        "b": b,
        "c": c,
        "d": d,
        "e": e
    }).uint8().rename("water")

    return water

multidim_semisupervised(img, bands, rank_band=None, ranking='min', region=None, n_samples=500, seed=7, scale=90, proba_threshold=None)

Implementation of the Automatic water detection from multidimensional hierarchical clustering algorithm. Method details: https://doi.org/10.1016/j.rse.2020.112209 Note: this is a similar method, not exact from the paper

Parameters:

Name Type Description Default
img ee.Image

input image to thresholding algorithm

required
bands list | ee.List

band names to use for the semi supervised classification

required
rank_band str

band name used to rank which unserpervised class is water. If None then first band name in bands is used. default = None

None
ranking str

method to rank the classes by rank_band. Options are 'min' or 'max'. If 'min', then the lowest class mean is considered water. default = 'min'

'min'
region ee.Geometry | None

region to sample values for KMeans clustering, if set to None will use img.geometry(). default = None

None
samples int

number of stratified samples to gather for clustering from the initial water/no-water map. default=500

required
seed int

random number generator seed for sampling. default = 7

7
scale int

scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90

90
proba_threshold float

probability threshold to create a binary water mask, should be in range of 0-1. If None, then the probability values are returned. default = None

None
Source code in hydrafloods/thresholding.py
def multidim_semisupervised(
    img,
    bands,
    rank_band=None,
    ranking="min",
    region=None,
    n_samples=500,
    seed=7,
    scale=90,
    proba_threshold=None,
):
    """Implementation of the Automatic water detection from
    multidimensional hierarchical clustering algorithm.
    Method details: https://doi.org/10.1016/j.rse.2020.112209
    Note: this is a similar method, not exact from the paper

    args:
        img (ee.Image): input image to thresholding algorithm
        bands (list | ee.List): band names to use for the semi supervised classification
        rank_band (str, optional): band name used to rank which unserpervised class is water. If None then first band name in `bands` is used. default = None
        ranking (str, optional): method to rank the classes by `rank_band`. Options are 'min' or 'max'. If 'min', then the lowest class mean is considered water. default = 'min'
        region (ee.Geometry | None, optional): region to sample values for KMeans clustering, if set to `None` will use img.geometry(). default = None
        samples (int, optional): number of stratified samples to gather for clustering from the initial water/no-water map. default=500
        seed (int, optional): random number generator seed for sampling. default = 7
        scale (int, optional): scale at which to perform reduction operations, setting higher will prevent OOM errors. default = 90
        proba_threshold (float, optional): probability threshold to create a binary water mask, should be in range of 0-1. If None, then the probability values are returned. default = None
    """

    if region is None:
        region = img.geometry()

    if bands is None:
        bands = img.bandNames()
    else:
        bands = ee.List(bands)

    if rank_band is None:
        rank_band = ee.String(bands.get(0))

    samples = img.select(bands).sample(
        region=region,
        scale=scale,
        numPixels=n_samples,
        seed=seed,
        dropNulls=True,
        tileScale=16,
    )

    classifier = ml.unsupervised_rf(
        100, samples, features=bands, rank_feature=rank_band, ranking=ranking
    )

    probas = img.select(bands).classify(classifier)

    if proba_threshold is None:
        output = probas.rename("water_proba")
    else:
        output = probas.gt(proba_threshold).rename("water").uint8()

    return output

otsu(histogram)

Otsu's method threhsolding algorithm. Computes single intensity threshold that separate histogram into two classes, foreground and background

Parameters:

Name Type Description Default
histogram ee.Dictionary

computed object from ee.Reducer.histogram with keys "histogram" and "bucketMeans"

required

Returns:

Type Description
ee.Number

value of maximum inter-class intensity variance based on histogram

Source code in hydrafloods/thresholding.py
def otsu(histogram):
    """Otsu's method threhsolding algorithm.
    Computes single intensity threshold that separate histogram into two classes, foreground and background

    args:
        histogram (ee.Dictionary): computed object from ee.Reducer.histogram with keys "histogram" and "bucketMeans"

    returns:
        ee.Number: value of maximum inter-class intensity variance based on histogram
    """
    counts = ee.Array(ee.Dictionary(histogram).get("histogram"))
    means = ee.Array(ee.Dictionary(histogram).get("bucketMeans"))
    size = means.length().get([0])
    total = counts.reduce(ee.Reducer.sum(), [0]).get([0])
    sums = means.multiply(counts).reduce(ee.Reducer.sum(), [0]).get([0])
    mean = sums.divide(total)
    indices = ee.List.sequence(1, size)
    # Compute between sum of squares, where each mean partitions the data.

    def bss_function(i):
        aCounts = counts.slice(0, 0, i)
        aCount = aCounts.reduce(ee.Reducer.sum(), [0]).get([0])
        aMeans = means.slice(0, 0, i)
        aMean = (
            aMeans.multiply(aCounts)
            .reduce(ee.Reducer.sum(), [0])
            .get([0])
            .divide(aCount)
        )
        bCount = total.subtract(aCount)
        bMean = sums.subtract(aCount.multiply(aMean)).divide(bCount)
        return aCount.multiply(aMean.subtract(mean).pow(2)).add(
            bCount.multiply(bMean.subtract(mean).pow(2))
        )

    bss = indices.map(bss_function)
    output = means.sort(bss).get([-1])
    return ee.Number(output)