utils module
hydrafloods.utils
decode_date(date)
Decodes a date from a command line argument, returning msec since epoch".
Parameters:
Name | Type | Description | Default |
---|---|---|---|
date |
str |
date value in a format that can be parsed into datetime object |
required |
Returns:
Type | Description |
---|---|
datetime.datetime |
decoded datetime value |
Exceptions:
Type | Description |
---|---|
TypeError |
if string does not conform to a legal date format. |
Source code in hydrafloods/utils.py
def decode_date(date):
"""Decodes a date from a command line argument, returning msec since epoch".
args:
date (str): date value in a format that can be parsed into datetime object
returns:
datetime.datetime: decoded datetime value
raises:
TypeError: if string does not conform to a legal date format.
"""
date_formats = [
"%Y%m%d",
"%Y-%m-%d",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%S.%f",
]
for date_format in date_formats:
try:
dt = datetime.datetime.strptime(date, date_format)
return dt
except ValueError:
continue
raise TypeError(f"Invalid value for property of type 'date': '{date}'.")
list_gcs_objs(bucket_path, pattern=None, output_url=False, project=None)
Function to list objects in Google Cloud Storage Bucket
Parameters:
Name | Type | Description | Default |
---|---|---|---|
bucket_path |
str |
Google Cloud Storage bucket name |
required |
pattern |
str | None |
regex pattern to search in bucket. Can seach folders by adding folder names (i.e. pattern = 'subfolder/*.txt). If None then will not use search pattern. default = None |
None |
output_url |
bool |
boolean switch to output google cloud storage http url or google cloud storage object uri. If false will output gcs uri. default = False |
False |
project |
str | None |
Cloud project name to use when initiation file spec. If None then use default gcloud config. default = None |
None |
Returns:
Type | Description |
---|---|
list[str] |
List of objects in bucket that match pattern |
Source code in hydrafloods/utils.py
def list_gcs_objs(bucket_path, pattern=None, output_url=False, project=None):
"""Function to list objects in Google Cloud Storage Bucket
args:
bucket_path (str): Google Cloud Storage bucket name
pattern (str | None, optional): regex pattern to search in bucket.
Can seach folders by adding folder names (i.e. pattern = 'subfolder/*.txt).
If None then will not use search pattern. default = None
output_url (bool, optional): boolean switch to output google cloud storage http url
or google cloud storage object uri. If false will output gcs uri. default = False
project (str | None): Cloud project name to use when initiation file spec. If None then
use default gcloud config. default = None
returns:
list[str]: List of objects in bucket that match pattern
"""
fs = gcsfs.GCSFileSystem(project=project)
if pattern is not None:
bucket_path = (
bucket_path + "/" if not bucket_path.endswith("/") else bucket_path
)
blobs = fs.glob(f"{bucket_path}{pattern}")
else:
blobs = fs.ls(bucket_path)
base = "https://storage.cloud.google.com/{0}" if output_url else "gs://{0}"
return [base.format(blob) for blob in blobs]
push_to_ee(bucket_obj, asset_collection, properties=None, delete_bucket_obj=False)
Helper function to begin ingest process for imagery on GCS to GEE
Thinly wraps earthengine upload image
Parameters:
Name | Type | Description | Default |
---|---|---|---|
bucket_obj |
str |
GCS bucket object to ingest into GEE. Expects that object has mime type of image/tiff |
required |
asset_collection |
str |
Earth Engine asset collection to push object to |
required |
properties |
list[str] |
list of properties to set when ingesting files. If None then no properties will be set. default = None |
None |
delete_bucket_obj |
bool |
boolean switch to delete GCS object once ingested into EE. If set to False then file will remain on GCS. default = False |
False |
Source code in hydrafloods/utils.py
def push_to_ee(bucket_obj, asset_collection, properties=None, delete_bucket_obj=False):
"""Helper function to begin ingest process for imagery on GCS to GEE
Thinly wraps `earthengine upload image`
args:
bucket_obj (str): GCS bucket object to ingest into GEE. Expects that object has mime type of image/tiff
asset_collection (str): Earth Engine asset collection to push object to
properties (list[str], optional): list of properties to set when ingesting files. If None then no properties
will be set. default = None
delete_bucket_obj (bool, optional): boolean switch to delete GCS object once ingested into EE. If set to False
then file will remain on GCS. default = False
"""
name = os.path.basename(bucket_obj).replace(".", "_")
asset = asset_collection + name
pStr = ""
for i in properties:
pStr += "--{0} {1} ".format(i, properties[i])
binPath = os.path.dirname(sys.executable)
cmd = "{0}/earthengine upload image --asset_id={1} {2} {3}".format(
binPath, asset, pStr, bucket_obj
)
proc = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
out, err = proc.communicate()
if properties:
pStr = ""
running = True
while running == True:
tasks = ee.batch.Task.list()
if "COMPLETED" in str(tasks[0]):
running = False
elif "FAILED" in str(tasks[0]):
print(
"EE upload process failed for image {}, check Earth Engine for error".format(
bucket_obj
)
)
sys.exit(1)
if delete_bucket_obj:
cmd = "gsutil rm {0}".format(bucket_obj)
proc = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
out, err = proc.communicate()
return
push_to_gcs(file, bucket_path)
Helper function to copy local files to Google Cloud Storage
Thinly wraps gsutil cp
command line
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file |
str |
file path to push to GCS |
required |
bucket_path |
str |
path on GCS to copy file to |
required |
Source code in hydrafloods/utils.py
def push_to_gcs(file, bucket_path):
"""Helper function to copy local files to Google Cloud Storage
Thinly wraps `gsutil cp` command line
args:
file (str): file path to push to GCS
bucket_path (str): path on GCS to copy file to
"""
if os.path.exists(file):
cmd = "gsutil cp {0} {1}".format(file, bucket_path)
proc = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
out, err = proc.communicate()
else:
raise ValueError('file "{0} does not exist'.format(file))
return