[docs]classDataFileMetadata(TypedDict):"""Metadata for a data file """e_tag:str"""The ETag of the file"""sha1:SHA1Hash"""The SHA1 hash of the file"""
[docs]classS3Client(ClientBase):"""S3 client for downloading data files and assets """data_dirs:DataFiles"""Remote data directories"""data_files_local:DataFiles"""Local data files"""data_files_remote:DataFiles"""Remote data files"""metadata_file:Path"""Filename to store cahced metadata"""data_file_metadata:dict[DataFileType,DataFileMetadata|None]"""Cached metadata for data files"""def__init__(self,app:web.Application)->None:# import app key locally because app.cleanup_ctx has issues otherwiseself.app=appself.app_conf=app[APP_CONF_KEY]config=app[ConfigKey]super().__init__(config)data_dirs:DataFiles={'clips':config.aws.clips_prefix,'legistar':config.aws.legistar_prefix,'legistar_rguid':config.aws.legistar_rguid_prefix,}s3_data_dir=self.app_conf.s3_data_dirasserts3_data_dirisnotNonedata_files_local:DataFiles={'clips':s3_data_dir/config.data_file,'legistar':s3_data_dir/config.legistar.data_file,'legistar_rguid':s3_data_dir/RGuidLegistarData._get_data_file(config),}data_files_remote:DataFiles={k:data_dirs[k]/data_files_local[k].nameforkindata_files_local}self.search_dir_local=s3_data_dir/'_search-index'self.search_dir_remote=config.aws.legistar_prefix/'_search-index'self.search_dir_local.mkdir(parents=True,exist_ok=True)self.data_dirs=data_dirsself.data_files_local=data_files_localself.data_files_remote=data_files_remoteself.metadata_file=s3_data_dir/'s3metadata.json'self.data_file_metadata=self.load_data_file_metadata()
[docs]asyncdefget_search_index_dir(self)->None:"""Download the search index directory Existing files will be overwritten. """index_dir=self.search_dir_remoteobjs=[objasyncforobjinself.iter_objects(index_dir)ifnotobj.key.endswith('/')]forobjinobjs:local_file=self.search_dir_local/obj.key.rsplit('/',1)[-1]logger.debug(f'Found search index file "{obj.key}"')local_file.parent.mkdir(parents=True,exist_ok=True)awaitself.download_object(obj.key,local_file)logger.info(f'Downloaded search index file "{obj.key}" to "{local_file}"')
[docs]asyncdefget_data_files(self)->bool:"""Download data files if they have changed remotely """coros=set[Coroutine[Any,Any,bool]]()forkeyinself.data_files_local.keys():coros.add(self.download_data_file(key))iflen(coros):r=awaitasyncio.gather(*coros)changed=any(r)ifchanged:self.save_data_file_metadata()returnchangedreturnFalse
[docs]asyncdefdownload_data_file(self,key:DataFileType,remote_metadata:DataFileMetadata|None=None)->bool:"""Download a data file if it has changed remotely """local_file=self.data_files_local[key]remote_file=self.data_files_remote[key]ifremote_metadataisNone:remote_metadata=awaitself.get_data_file_remote_meta(key)cached_metadata=self.data_file_metadata[key]ifcached_metadataisnotNoneandcached_metadata==remote_metadata:assertlocal_file.exists()logger.debug(f'Data for "{key}" is up to date')returnFalselocal_file.parent.mkdir(parents=True,exist_ok=True)awaitself.download_object(remote_file,local_file)logger.info(f'Downloaded data for "{key}" to "{local_file}"')self.data_file_metadata[key]=remote_metadatareturnTrue
[docs]asyncdefget_data_file_remote_meta(self,key:DataFileType)->DataFileMetadata:"""Get the remote metadata for a data file """remote_file=self.data_files_remote[key]obj=awaitself.get_object(remote_file)e_tag=awaitobj.e_tagsha1=awaitself.get_object_sha1(remote_file)assertsha1isnotNonereturn{'e_tag':e_tag,'sha1':sha1,}
[docs]defload_data_file_metadata(self)->dict[DataFileType,DataFileMetadata|None]:"""Load the data file metadata from disk """ifself.metadata_file.exists():returnjson.loads(self.metadata_file.read_text())return{k:Noneforkinself.data_files_local}
[docs]defsave_data_file_metadata(self)->None:"""Save the data file metadata to disk """self.metadata_file.write_text(json.dumps(self.data_file_metadata))
S3ClientKey=web.AppKey('S3Client',S3Client)"""App key for the :class:`.s3client.S3Client` instance"""