[AIRFLOW-2037] Add methods to get Hash values of a GCS object
- Added `get_md5hash` and `get_crc32c` in `gcs_hook` to aid in Data integrity validations. Closes #2977 from kaxil/hashing_gcs_hook
This commit is contained in:
Родитель
48202ad5bd
Коммит
80d2ee8acc
|
@ -297,3 +297,55 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook):
|
|||
except errors.HttpError as ex:
|
||||
if ex.resp['status'] == '404':
|
||||
raise ValueError('Object Not Found')
|
||||
|
||||
def get_crc32c(self, bucket, object):
|
||||
"""
|
||||
Gets the CRC32c checksum of an object in Google Cloud Storage.
|
||||
:param bucket: The Google cloud storage bucket where the object is.
|
||||
:type bucket: string
|
||||
:param object: The name of the object to check in the Google cloud
|
||||
storage bucket.
|
||||
:type object: string
|
||||
"""
|
||||
self.log.info('Retrieving the crc32c checksum of '
|
||||
'object: %s in bucket: %s', object, bucket)
|
||||
service = self.get_conn()
|
||||
try:
|
||||
response = service.objects().get(
|
||||
bucket=bucket,
|
||||
object=object
|
||||
).execute()
|
||||
|
||||
crc32c = response['crc32c']
|
||||
self.log.info('The crc32c checksum of %s is %s', object, crc32c)
|
||||
return crc32c
|
||||
|
||||
except errors.HttpError as ex:
|
||||
if ex.resp['status'] == '404':
|
||||
raise ValueError('Object Not Found')
|
||||
|
||||
def get_md5hash(self, bucket, object):
|
||||
"""
|
||||
Gets the MD5 hash of an object in Google Cloud Storage.
|
||||
:param bucket: The Google cloud storage bucket where the object is.
|
||||
:type bucket: string
|
||||
:param object: The name of the object to check in the Google cloud
|
||||
storage bucket.
|
||||
:type object: string
|
||||
"""
|
||||
self.log.info('Retrieving the MD5 hash of '
|
||||
'object: %s in bucket: %s', object, bucket)
|
||||
service = self.get_conn()
|
||||
try:
|
||||
response = service.objects().get(
|
||||
bucket=bucket,
|
||||
object=object
|
||||
).execute()
|
||||
|
||||
md5hash = response['md5Hash']
|
||||
self.log.info('The md5Hash of %s is %s', object, md5hash)
|
||||
return md5hash
|
||||
|
||||
except errors.HttpError as ex:
|
||||
if ex.resp['status'] == '404':
|
||||
raise ValueError('Object Not Found')
|
||||
|
|
Загрузка…
Ссылка в новой задаче