Upgrade redact_cli_py to 0.3.2 (#1044)
* Upgrade redact_cli_py to 0.3.2 * Fix mailto typo, update document URL
This commit is contained in:
Родитель
2e5ef6f0ae
Коммит
801731cff2
|
@ -0,0 +1,3 @@
|
|||
[flake8]
|
||||
max-line-length = 88
|
||||
extend-ignore = E203, E501, PIE798
|
|
@ -6,6 +6,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.3.2] - 2022-08-11
|
||||
### Changed
|
||||
- Refactor code styles with flake8/black and their extensions.
|
||||
|
||||
## [0.3.1] - 2022-08-02
|
||||
### Added
|
||||
- Support to multi page PDFs and TIFFs in batch redact CLI (`batch_redact.py`)
|
||||
|
||||
## [0.3.0] - 2022-01-06
|
||||
### Added
|
||||
- Support to FormRecognizer OCR Result v3.0 format while still maintaining the backward compatibility to v2.0 and v2.1.
|
||||
|
||||
### Changed
|
||||
- The default API version of OCR result redaction has changed from v2.x to v3.x schema.
|
||||
- You now need to specified which version of the OCR result you want to redact in `redact.py` and `batch_redact.py`.
|
||||
- Before:
|
||||
|
||||
``` bash
|
||||
python redact.py ocr <ocr_result_path> <fott_label_path> <output_path>
|
||||
python batch_redact.py <input_container> <input_folder_path> <output_container> <output_folder_path>
|
||||
```
|
||||
|
||||
- After:
|
||||
|
||||
``` bash
|
||||
python redact.py ocr <ocr_result_path> <fott_label_path> <output_path> <api_version>
|
||||
python batch_redact.py <input_container> <input_folder_path> <output_container> <output_folder_path> <api_version>
|
||||
```
|
||||
|
||||
Where API Version is one of the following:
|
||||
- v2.0
|
||||
- v2.1
|
||||
- v3.0
|
||||
|
||||
## [0.2.3] - 2021-12-13
|
||||
### Added
|
||||
- Support to redact some Latin ligature letters and letters with diacritics.
|
||||
|
|
|
@ -10,6 +10,12 @@ shapely = "*"
|
|||
dacite = "*"
|
||||
azure-storage-blob = "*"
|
||||
pypdfium = "*"
|
||||
flake8 = "*"
|
||||
black = "*"
|
||||
flake8-bugbear = "*"
|
||||
flake8-pie = "*"
|
||||
pep8-naming = "*"
|
||||
flake8-black = "*"
|
||||
|
||||
[dev-packages]
|
||||
pytest = "*"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "6a2ce598371ced09c629f7844aa4c1172acbef11465108fc637f7e06958a1524"
|
||||
"sha256": "7afbe6fd0e14f4c0b98d8ee3aa9e90e49b1250c72d796d8144fae0067f787d2a"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -16,105 +16,187 @@
|
|||
]
|
||||
},
|
||||
"default": {
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6",
|
||||
"sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"version": "==22.1.0"
|
||||
},
|
||||
"azure-core": {
|
||||
"hashes": [
|
||||
"sha256:25407390dde142d3e41ecf78bb18cedda9b7f7a0af558d082dec711c4a334f46",
|
||||
"sha256:906e031a8241fe0794ec4137aca77a1aeab2ebde5cd6049c377d05cb6b87b691"
|
||||
"sha256:0f3a20d245659bf81fb3670070a5410c8d4a43298d5a981e62dce393000a9084",
|
||||
"sha256:a76856fa83efe1925a4fd917dc179c7daa15917dd71da2774833fa82a96f3dfa"
|
||||
],
|
||||
"version": "==1.17.0"
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.24.2"
|
||||
},
|
||||
"azure-storage-blob": {
|
||||
"hashes": [
|
||||
"sha256:e74c2c49fd04b80225f5b9734f1dbd417d89f280abfedccced3ac21509e1659d",
|
||||
"sha256:eb37b50ddfb6e558b29f6c8c03b0666514e55d6170bf4624e7261a3af93c6401"
|
||||
"sha256:280a6ab032845bab9627582bee78a50497ca2f14772929b5c5ee8b4605af0cb3",
|
||||
"sha256:53f0d4cd32970ac9ff9b9753f83dd2fb3f9ac30e1d01e71638c436c509bfd884"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==12.8.1"
|
||||
"version": "==12.13.0"
|
||||
},
|
||||
"black": {
|
||||
"hashes": [
|
||||
"sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90",
|
||||
"sha256:187d96c5e713f441a5829e77120c269b6514418f4513a390b0499b0987f2ff1c",
|
||||
"sha256:2ea29072e954a4d55a2ff58971b83365eba5d3d357352a07a7a4df0d95f51c78",
|
||||
"sha256:4af5bc0e1f96be5ae9bd7aaec219c901a94d6caa2484c21983d043371c733fc4",
|
||||
"sha256:560558527e52ce8afba936fcce93a7411ab40c7d5fe8c2463e279e843c0328ee",
|
||||
"sha256:568ac3c465b1c8b34b61cd7a4e349e93f91abf0f9371eda1cf87194663ab684e",
|
||||
"sha256:6797f58943fceb1c461fb572edbe828d811e719c24e03375fd25170ada53825e",
|
||||
"sha256:6c1734ab264b8f7929cef8ae5f900b85d579e6cbfde09d7387da8f04771b51c6",
|
||||
"sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9",
|
||||
"sha256:7ba9be198ecca5031cd78745780d65a3f75a34b2ff9be5837045dce55db83d1c",
|
||||
"sha256:94783f636bca89f11eb5d50437e8e17fbc6a929a628d82304c80fa9cd945f256",
|
||||
"sha256:a218d7e5856f91d20f04e931b6f16d15356db1c846ee55f01bac297a705ca24f",
|
||||
"sha256:a3db5b6409b96d9bd543323b23ef32a1a2b06416d525d27e0f67e74f1446c8f2",
|
||||
"sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c",
|
||||
"sha256:b154e6bbde1e79ea3260c4b40c0b7b3109ffcdf7bc4ebf8859169a6af72cd70b",
|
||||
"sha256:b270a168d69edb8b7ed32c193ef10fd27844e5c60852039599f9184460ce0807",
|
||||
"sha256:b9fd45787ba8aa3f5e0a0a98920c1012c884622c6c920dbe98dbd05bc7c70fbf",
|
||||
"sha256:c85928b9d5f83b23cee7d0efcb310172412fbf7cb9d9ce963bd67fd141781def",
|
||||
"sha256:c9a3ac16efe9ec7d7381ddebcc022119794872abce99475345c5a61aa18c45ad",
|
||||
"sha256:cfaf3895a9634e882bf9d2363fed5af8888802d670f58b279b0bece00e9a872d",
|
||||
"sha256:e439798f819d49ba1c0bd9664427a05aab79bfba777a6db94fd4e56fae0cb849",
|
||||
"sha256:f586c26118bc6e714ec58c09df0157fe2d9ee195c764f630eb0d8e7ccce72e69",
|
||||
"sha256:f6fe02afde060bbeef044af7996f335fbe90b039ccf3f5eb8f16df8b20f77666"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==22.6.0"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee",
|
||||
"sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"
|
||||
"sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d",
|
||||
"sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"
|
||||
],
|
||||
"version": "==2021.5.30"
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2022.6.15"
|
||||
},
|
||||
"cffi": {
|
||||
"hashes": [
|
||||
"sha256:06c54a68935738d206570b20da5ef2b6b6d92b38ef3ec45c5422c0ebaf338d4d",
|
||||
"sha256:0c0591bee64e438883b0c92a7bed78f6290d40bf02e54c5bf0978eaf36061771",
|
||||
"sha256:19ca0dbdeda3b2615421d54bef8985f72af6e0c47082a8d26122adac81a95872",
|
||||
"sha256:22b9c3c320171c108e903d61a3723b51e37aaa8c81255b5e7ce102775bd01e2c",
|
||||
"sha256:26bb2549b72708c833f5abe62b756176022a7b9a7f689b571e74c8478ead51dc",
|
||||
"sha256:33791e8a2dc2953f28b8d8d300dde42dd929ac28f974c4b4c6272cb2955cb762",
|
||||
"sha256:3c8d896becff2fa653dc4438b54a5a25a971d1f4110b32bd3068db3722c80202",
|
||||
"sha256:4373612d59c404baeb7cbd788a18b2b2a8331abcc84c3ba40051fcd18b17a4d5",
|
||||
"sha256:487d63e1454627c8e47dd230025780e91869cfba4c753a74fda196a1f6ad6548",
|
||||
"sha256:48916e459c54c4a70e52745639f1db524542140433599e13911b2f329834276a",
|
||||
"sha256:4922cd707b25e623b902c86188aca466d3620892db76c0bdd7b99a3d5e61d35f",
|
||||
"sha256:55af55e32ae468e9946f741a5d51f9896da6b9bf0bbdd326843fec05c730eb20",
|
||||
"sha256:57e555a9feb4a8460415f1aac331a2dc833b1115284f7ded7278b54afc5bd218",
|
||||
"sha256:5d4b68e216fc65e9fe4f524c177b54964af043dde734807586cf5435af84045c",
|
||||
"sha256:64fda793737bc4037521d4899be780534b9aea552eb673b9833b01f945904c2e",
|
||||
"sha256:6d6169cb3c6c2ad50db5b868db6491a790300ade1ed5d1da29289d73bbe40b56",
|
||||
"sha256:7bcac9a2b4fdbed2c16fa5681356d7121ecabf041f18d97ed5b8e0dd38a80224",
|
||||
"sha256:80b06212075346b5546b0417b9f2bf467fea3bfe7352f781ffc05a8ab24ba14a",
|
||||
"sha256:818014c754cd3dba7229c0f5884396264d51ffb87ec86e927ef0be140bfdb0d2",
|
||||
"sha256:8eb687582ed7cd8c4bdbff3df6c0da443eb89c3c72e6e5dcdd9c81729712791a",
|
||||
"sha256:99f27fefe34c37ba9875f224a8f36e31d744d8083e00f520f133cab79ad5e819",
|
||||
"sha256:9f3e33c28cd39d1b655ed1ba7247133b6f7fc16fa16887b120c0c670e35ce346",
|
||||
"sha256:a8661b2ce9694ca01c529bfa204dbb144b275a31685a075ce123f12331be790b",
|
||||
"sha256:a9da7010cec5a12193d1af9872a00888f396aba3dc79186604a09ea3ee7c029e",
|
||||
"sha256:aedb15f0a5a5949ecb129a82b72b19df97bbbca024081ed2ef88bd5c0a610534",
|
||||
"sha256:b315d709717a99f4b27b59b021e6207c64620790ca3e0bde636a6c7f14618abb",
|
||||
"sha256:ba6f2b3f452e150945d58f4badd92310449876c4c954836cfb1803bdd7b422f0",
|
||||
"sha256:c33d18eb6e6bc36f09d793c0dc58b0211fccc6ae5149b808da4a62660678b156",
|
||||
"sha256:c9a875ce9d7fe32887784274dd533c57909b7b1dcadcc128a2ac21331a9765dd",
|
||||
"sha256:c9e005e9bd57bc987764c32a1bee4364c44fdc11a3cc20a40b93b444984f2b87",
|
||||
"sha256:d2ad4d668a5c0645d281dcd17aff2be3212bc109b33814bbb15c4939f44181cc",
|
||||
"sha256:d950695ae4381ecd856bcaf2b1e866720e4ab9a1498cba61c602e56630ca7195",
|
||||
"sha256:e22dcb48709fc51a7b58a927391b23ab37eb3737a98ac4338e2448bef8559b33",
|
||||
"sha256:e8c6a99be100371dbb046880e7a282152aa5d6127ae01783e37662ef73850d8f",
|
||||
"sha256:e9dc245e3ac69c92ee4c167fbdd7428ec1956d4e754223124991ef29eb57a09d",
|
||||
"sha256:eb687a11f0a7a1839719edd80f41e459cc5366857ecbed383ff376c4e3cc6afd",
|
||||
"sha256:eb9e2a346c5238a30a746893f23a9535e700f8192a68c07c0258e7ece6ff3728",
|
||||
"sha256:ed38b924ce794e505647f7c331b22a693bee1538fdf46b0222c4717b42f744e7",
|
||||
"sha256:f0010c6f9d1a4011e429109fda55a225921e3206e7f62a0c22a35344bfd13cca",
|
||||
"sha256:f0c5d1acbfca6ebdd6b1e3eded8d261affb6ddcf2186205518f1428b8569bb99",
|
||||
"sha256:f10afb1004f102c7868ebfe91c28f4a712227fe4cb24974350ace1f90e1febbf",
|
||||
"sha256:f174135f5609428cc6e1b9090f9268f5c8935fddb1b25ccb8255a2d50de6789e",
|
||||
"sha256:f3ebe6e73c319340830a9b2825d32eb6d8475c1dac020b4f0aa774ee3b898d1c",
|
||||
"sha256:f627688813d0a4140153ff532537fbe4afea5a3dffce1f9deb7f91f848a832b5",
|
||||
"sha256:fd4305f86f53dfd8cd3522269ed7fc34856a8ee3709a5e28b2836b2db9d4cd69"
|
||||
"sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5",
|
||||
"sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef",
|
||||
"sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104",
|
||||
"sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426",
|
||||
"sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405",
|
||||
"sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375",
|
||||
"sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a",
|
||||
"sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e",
|
||||
"sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc",
|
||||
"sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf",
|
||||
"sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185",
|
||||
"sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497",
|
||||
"sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3",
|
||||
"sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35",
|
||||
"sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c",
|
||||
"sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83",
|
||||
"sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21",
|
||||
"sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca",
|
||||
"sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984",
|
||||
"sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac",
|
||||
"sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd",
|
||||
"sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee",
|
||||
"sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a",
|
||||
"sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2",
|
||||
"sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192",
|
||||
"sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7",
|
||||
"sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585",
|
||||
"sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f",
|
||||
"sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e",
|
||||
"sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27",
|
||||
"sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b",
|
||||
"sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e",
|
||||
"sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e",
|
||||
"sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d",
|
||||
"sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c",
|
||||
"sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415",
|
||||
"sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82",
|
||||
"sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02",
|
||||
"sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314",
|
||||
"sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325",
|
||||
"sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c",
|
||||
"sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3",
|
||||
"sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914",
|
||||
"sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045",
|
||||
"sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d",
|
||||
"sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9",
|
||||
"sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5",
|
||||
"sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2",
|
||||
"sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c",
|
||||
"sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3",
|
||||
"sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2",
|
||||
"sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8",
|
||||
"sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d",
|
||||
"sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d",
|
||||
"sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9",
|
||||
"sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162",
|
||||
"sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76",
|
||||
"sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4",
|
||||
"sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e",
|
||||
"sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9",
|
||||
"sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6",
|
||||
"sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b",
|
||||
"sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01",
|
||||
"sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"
|
||||
],
|
||||
"version": "==1.14.6"
|
||||
"version": "==1.15.1"
|
||||
},
|
||||
"charset-normalizer": {
|
||||
"hashes": [
|
||||
"sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
|
||||
"sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
|
||||
"sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5",
|
||||
"sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==2.0.4"
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2.1.0"
|
||||
},
|
||||
"click": {
|
||||
"hashes": [
|
||||
"sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e",
|
||||
"sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==8.1.3"
|
||||
},
|
||||
"colorama": {
|
||||
"hashes": [
|
||||
"sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da",
|
||||
"sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"
|
||||
],
|
||||
"markers": "platform_system == 'Windows'",
|
||||
"version": "==0.4.5"
|
||||
},
|
||||
"cryptography": {
|
||||
"hashes": [
|
||||
"sha256:0f1212a66329c80d68aeeb39b8a16d54ef57071bf22ff4e521657b27372e327d",
|
||||
"sha256:1e056c28420c072c5e3cb36e2b23ee55e260cb04eee08f702e0edfec3fb51959",
|
||||
"sha256:240f5c21aef0b73f40bb9f78d2caff73186700bf1bc6b94285699aff98cc16c6",
|
||||
"sha256:26965837447f9c82f1855e0bc8bc4fb910240b6e0d16a664bb722df3b5b06873",
|
||||
"sha256:37340614f8a5d2fb9aeea67fd159bfe4f5f4ed535b1090ce8ec428b2f15a11f2",
|
||||
"sha256:3d10de8116d25649631977cb37da6cbdd2d6fa0e0281d014a5b7d337255ca713",
|
||||
"sha256:3d8427734c781ea5f1b41d6589c293089704d4759e34597dce91014ac125aad1",
|
||||
"sha256:7ec5d3b029f5fa2b179325908b9cd93db28ab7b85bb6c1db56b10e0b54235177",
|
||||
"sha256:8e56e16617872b0957d1c9742a3f94b43533447fd78321514abbe7db216aa250",
|
||||
"sha256:b01fd6f2737816cb1e08ed4807ae194404790eac7ad030b34f2ce72b332f5586",
|
||||
"sha256:bf40af59ca2465b24e54f671b2de2c59257ddc4f7e5706dbd6930e26823668d3",
|
||||
"sha256:de4e5f7f68220d92b7637fc99847475b59154b7a1b3868fb7385337af54ac9ca",
|
||||
"sha256:eb8cc2afe8b05acbd84a43905832ec78e7b3873fb124ca190f574dca7389a87d",
|
||||
"sha256:ee77aa129f481be46f8d92a1a7db57269a2f23052d5f2433b4621bb457081cc9"
|
||||
"sha256:190f82f3e87033821828f60787cfa42bff98404483577b591429ed99bed39d59",
|
||||
"sha256:2be53f9f5505673eeda5f2736bea736c40f051a739bfae2f92d18aed1eb54596",
|
||||
"sha256:30788e070800fec9bbcf9faa71ea6d8068f5136f60029759fd8c3efec3c9dcb3",
|
||||
"sha256:3d41b965b3380f10e4611dbae366f6dc3cefc7c9ac4e8842a806b9672ae9add5",
|
||||
"sha256:4c590ec31550a724ef893c50f9a97a0c14e9c851c85621c5650d699a7b88f7ab",
|
||||
"sha256:549153378611c0cca1042f20fd9c5030d37a72f634c9326e225c9f666d472884",
|
||||
"sha256:63f9c17c0e2474ccbebc9302ce2f07b55b3b3fcb211ded18a42d5764f5c10a82",
|
||||
"sha256:6bc95ed67b6741b2607298f9ea4932ff157e570ef456ef7ff0ef4884a134cc4b",
|
||||
"sha256:7099a8d55cd49b737ffc99c17de504f2257e3787e02abe6d1a6d136574873441",
|
||||
"sha256:75976c217f10d48a8b5a8de3d70c454c249e4b91851f6838a4e48b8f41eb71aa",
|
||||
"sha256:7bc997818309f56c0038a33b8da5c0bfbb3f1f067f315f9abd6fc07ad359398d",
|
||||
"sha256:80f49023dd13ba35f7c34072fa17f604d2f19bf0989f292cedf7ab5770b87a0b",
|
||||
"sha256:91ce48d35f4e3d3f1d83e29ef4a9267246e6a3be51864a5b7d2247d5086fa99a",
|
||||
"sha256:a958c52505c8adf0d3822703078580d2c0456dd1d27fabfb6f76fe63d2971cd6",
|
||||
"sha256:b62439d7cd1222f3da897e9a9fe53bbf5c104fff4d60893ad1355d4c14a24157",
|
||||
"sha256:b7f8dd0d4c1f21759695c05a5ec8536c12f31611541f8904083f3dc582604280",
|
||||
"sha256:d204833f3c8a33bbe11eda63a54b1aad7aa7456ed769a982f21ec599ba5fa282",
|
||||
"sha256:e007f052ed10cc316df59bc90fbb7ff7950d7e2919c9757fd42a2b8ecf8a5f67",
|
||||
"sha256:f2dcb0b3b63afb6df7fd94ec6fbddac81b5492513f7b0436210d390c14d46ee8",
|
||||
"sha256:f721d1885ecae9078c3f6bbe8a88bc0786b6e749bf32ccec1ef2b18929a05046",
|
||||
"sha256:f7a6de3e98771e183645181b3627e2563dcde3ce94a9e42a3f427d2255190327",
|
||||
"sha256:f8c0a6e9e1dd3eb0414ba320f85da6b0dcbd543126e30fcc546e7372a7fbf3b9"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.4.7"
|
||||
"version": "==37.0.4"
|
||||
},
|
||||
"dacite": {
|
||||
"hashes": [
|
||||
|
@ -124,96 +206,202 @@
|
|||
"index": "pypi",
|
||||
"version": "==1.6.0"
|
||||
},
|
||||
"flake8": {
|
||||
"hashes": [
|
||||
"sha256:93aa565ae2f0316b95bb57a354f2b2d55ee8508e1fe1cb13b77b9c195b4a2537",
|
||||
"sha256:b27fd7faa8d90aaae763664a489012292990388e5d3604f383b290caefbbc922"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.0.3"
|
||||
},
|
||||
"flake8-black": {
|
||||
"hashes": [
|
||||
"sha256:7d667d0059fd1aa468de1669d77cc934b7f1feeac258d57bdae69a8e73c4cd90",
|
||||
"sha256:8211f5e20e954cb57c709acccf2f3281ce27016d4c4b989c3e51f878bb7ce12a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.3.3"
|
||||
},
|
||||
"flake8-bugbear": {
|
||||
"hashes": [
|
||||
"sha256:db5d7a831ef4412a224b26c708967ff816818cabae415e76b8c58df156c4b8e5",
|
||||
"sha256:e450976a07e4f9d6c043d4f72b17ec1baf717fe37f7997009c8ae58064f88305"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==22.7.1"
|
||||
},
|
||||
"flake8-pie": {
|
||||
"hashes": [
|
||||
"sha256:47fd9d232b419f8db7a6465dee95cc24b385b1b8bdfd62b65250d70eaa06fc89",
|
||||
"sha256:a2d1e67a374d925f688300e9d0e202d1827a0d91e0a11114f712beee639bdc7c"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.15.0"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
|
||||
"sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
|
||||
"sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
|
||||
"sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==3.2"
|
||||
"markers": "python_version >= '3.5'",
|
||||
"version": "==3.3"
|
||||
},
|
||||
"isodate": {
|
||||
"hashes": [
|
||||
"sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8",
|
||||
"sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81"
|
||||
"sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96",
|
||||
"sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"
|
||||
],
|
||||
"version": "==0.6.0"
|
||||
"version": "==0.6.1"
|
||||
},
|
||||
"jsonpointer": {
|
||||
"hashes": [
|
||||
"sha256:150f80c5badd02c757da6644852f612f88e8b4bc2f9852dcbf557c8738919686",
|
||||
"sha256:5a34b698db1eb79ceac454159d3f7c12a451a91f6334a4f638454327b7a89962"
|
||||
"sha256:51801e558539b4e9cd268638c078c6c5746c9ac96bc38152d443400e4f3793e9",
|
||||
"sha256:97cba51526c829282218feb99dab1b1e6bdf8efd1c43dc9d57be093c0d69c99a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.1"
|
||||
"version": "==2.3"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
"sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325",
|
||||
"sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==0.7.0"
|
||||
},
|
||||
"msrest": {
|
||||
"hashes": [
|
||||
"sha256:72661bc7bedc2dc2040e8f170b6e9ef226ee6d3892e01affd4d26b06474d68d8",
|
||||
"sha256:c840511c845330e96886011a236440fafc2c9aff7b2df9c0a92041ee2dee3782"
|
||||
"sha256:21120a810e1233e5e6cc7fe40b474eeb4ec6f757a15d7cf86702c369f9567c32",
|
||||
"sha256:6e7661f46f3afd88b75667b7187a92829924446c7ea1d169be8c4bb7eeb788b9"
|
||||
],
|
||||
"version": "==0.6.21"
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==0.7.1"
|
||||
},
|
||||
"mypy-extensions": {
|
||||
"hashes": [
|
||||
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
|
||||
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
|
||||
],
|
||||
"version": "==0.4.3"
|
||||
},
|
||||
"oauthlib": {
|
||||
"hashes": [
|
||||
"sha256:42bf6354c2ed8c6acb54d971fce6f88193d97297e18602a3a886603f9d7730cc",
|
||||
"sha256:8f0215fcc533dd8dd1bee6f4c412d4f0cd7297307d43ac61666389e3bc3198a3"
|
||||
"sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2",
|
||||
"sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.1.1"
|
||||
"version": "==3.2.0"
|
||||
},
|
||||
"pathspec": {
|
||||
"hashes": [
|
||||
"sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a",
|
||||
"sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"
|
||||
],
|
||||
"version": "==0.9.0"
|
||||
},
|
||||
"pep8-naming": {
|
||||
"hashes": [
|
||||
"sha256:3af77cdaa9c7965f7c85a56cd579354553c9bbd3fdf3078a776f12db54dd6944",
|
||||
"sha256:f7867c1a464fe769be4f972ef7b79d6df1d9aff1b1f04ecf738d471963d3ab9c"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.13.1"
|
||||
},
|
||||
"pillow": {
|
||||
"hashes": [
|
||||
"sha256:0b2efa07f69dc395d95bb9ef3299f4ca29bcb2157dc615bae0b42c3c20668ffc",
|
||||
"sha256:114f816e4f73f9ec06997b2fde81a92cbf0777c9e8f462005550eed6bae57e63",
|
||||
"sha256:147bd9e71fb9dcf08357b4d530b5167941e222a6fd21f869c7911bac40b9994d",
|
||||
"sha256:15a2808e269a1cf2131930183dcc0419bc77bb73eb54285dde2706ac9939fa8e",
|
||||
"sha256:196560dba4da7a72c5e7085fccc5938ab4075fd37fe8b5468869724109812edd",
|
||||
"sha256:1c03e24be975e2afe70dfc5da6f187eea0b49a68bb2b69db0f30a61b7031cee4",
|
||||
"sha256:1fd5066cd343b5db88c048d971994e56b296868766e461b82fa4e22498f34d77",
|
||||
"sha256:29c9569049d04aaacd690573a0398dbd8e0bf0255684fee512b413c2142ab723",
|
||||
"sha256:2b6dfa068a8b6137da34a4936f5a816aba0ecc967af2feeb32c4393ddd671cba",
|
||||
"sha256:2cac53839bfc5cece8fdbe7f084d5e3ee61e1303cccc86511d351adcb9e2c792",
|
||||
"sha256:2ee77c14a0299d0541d26f3d8500bb57e081233e3fa915fa35abd02c51fa7fae",
|
||||
"sha256:37730f6e68bdc6a3f02d2079c34c532330d206429f3cee651aab6b66839a9f0e",
|
||||
"sha256:3f08bd8d785204149b5b33e3b5f0ebbfe2190ea58d1a051c578e29e39bfd2367",
|
||||
"sha256:479ab11cbd69612acefa8286481f65c5dece2002ffaa4f9db62682379ca3bb77",
|
||||
"sha256:4bc3c7ef940eeb200ca65bd83005eb3aae8083d47e8fcbf5f0943baa50726856",
|
||||
"sha256:660a87085925c61a0dcc80efb967512ac34dbb256ff7dd2b9b4ee8dbdab58cf4",
|
||||
"sha256:67b3666b544b953a2777cb3f5a922e991be73ab32635666ee72e05876b8a92de",
|
||||
"sha256:70af7d222df0ff81a2da601fab42decb009dc721545ed78549cb96e3a1c5f0c8",
|
||||
"sha256:75e09042a3b39e0ea61ce37e941221313d51a9c26b8e54e12b3ececccb71718a",
|
||||
"sha256:8960a8a9f4598974e4c2aeb1bff9bdd5db03ee65fd1fce8adf3223721aa2a636",
|
||||
"sha256:9364c81b252d8348e9cc0cb63e856b8f7c1b340caba6ee7a7a65c968312f7dab",
|
||||
"sha256:969cc558cca859cadf24f890fc009e1bce7d7d0386ba7c0478641a60199adf79",
|
||||
"sha256:9a211b663cf2314edbdb4cf897beeb5c9ee3810d1d53f0e423f06d6ebbf9cd5d",
|
||||
"sha256:a17ca41f45cf78c2216ebfab03add7cc350c305c38ff34ef4eef66b7d76c5229",
|
||||
"sha256:a2f381932dca2cf775811a008aa3027671ace723b7a38838045b1aee8669fdcf",
|
||||
"sha256:a4eef1ff2d62676deabf076f963eda4da34b51bc0517c70239fafed1d5b51500",
|
||||
"sha256:c088a000dfdd88c184cc7271bfac8c5b82d9efa8637cd2b68183771e3cf56f04",
|
||||
"sha256:c0e0550a404c69aab1e04ae89cca3e2a042b56ab043f7f729d984bf73ed2a093",
|
||||
"sha256:c11003197f908878164f0e6da15fce22373ac3fc320cda8c9d16e6bba105b844",
|
||||
"sha256:c2a5ff58751670292b406b9f06e07ed1446a4b13ffced6b6cab75b857485cbc8",
|
||||
"sha256:c35d09db702f4185ba22bb33ef1751ad49c266534339a5cebeb5159d364f6f82",
|
||||
"sha256:c379425c2707078dfb6bfad2430728831d399dc95a7deeb92015eb4c92345eaf",
|
||||
"sha256:cc866706d56bd3a7dbf8bac8660c6f6462f2f2b8a49add2ba617bc0c54473d83",
|
||||
"sha256:d0da39795049a9afcaadec532e7b669b5ebbb2a9134576ebcc15dd5bdae33cc0",
|
||||
"sha256:f156d6ecfc747ee111c167f8faf5f4953761b5e66e91a4e6767e548d0f80129c",
|
||||
"sha256:f4ebde71785f8bceb39dcd1e7f06bcc5d5c3cf48b9f69ab52636309387b097c8",
|
||||
"sha256:fc214a6b75d2e0ea7745488da7da3c381f41790812988c7a92345978414fad37",
|
||||
"sha256:fd7eef578f5b2200d066db1b50c4aa66410786201669fb76d5238b007918fb24",
|
||||
"sha256:ff04c373477723430dce2e9d024c708a047d44cf17166bf16e604b379bf0ca14"
|
||||
"sha256:0030fdbd926fb85844b8b92e2f9449ba89607231d3dd597a21ae72dc7fe26927",
|
||||
"sha256:030e3460861488e249731c3e7ab59b07c7853838ff3b8e16aac9561bb345da14",
|
||||
"sha256:0ed2c4ef2451de908c90436d6e8092e13a43992f1860275b4d8082667fbb2ffc",
|
||||
"sha256:136659638f61a251e8ed3b331fc6ccd124590eeff539de57c5f80ef3a9594e58",
|
||||
"sha256:13b725463f32df1bfeacbf3dd197fb358ae8ebcd8c5548faa75126ea425ccb60",
|
||||
"sha256:1536ad017a9f789430fb6b8be8bf99d2f214c76502becc196c6f2d9a75b01b76",
|
||||
"sha256:15928f824870535c85dbf949c09d6ae7d3d6ac2d6efec80f3227f73eefba741c",
|
||||
"sha256:17d4cafe22f050b46d983b71c707162d63d796a1235cdf8b9d7a112e97b15bac",
|
||||
"sha256:1802f34298f5ba11d55e5bb09c31997dc0c6aed919658dfdf0198a2fe75d5490",
|
||||
"sha256:1cc1d2451e8a3b4bfdb9caf745b58e6c7a77d2e469159b0d527a4554d73694d1",
|
||||
"sha256:1fd6f5e3c0e4697fa7eb45b6e93996299f3feee73a3175fa451f49a74d092b9f",
|
||||
"sha256:254164c57bab4b459f14c64e93df11eff5ded575192c294a0c49270f22c5d93d",
|
||||
"sha256:2ad0d4df0f5ef2247e27fc790d5c9b5a0af8ade9ba340db4a73bb1a4a3e5fb4f",
|
||||
"sha256:2c58b24e3a63efd22554c676d81b0e57f80e0a7d3a5874a7e14ce90ec40d3069",
|
||||
"sha256:2d33a11f601213dcd5718109c09a52c2a1c893e7461f0be2d6febc2879ec2402",
|
||||
"sha256:337a74fd2f291c607d220c793a8135273c4c2ab001b03e601c36766005f36885",
|
||||
"sha256:37ff6b522a26d0538b753f0b4e8e164fdada12db6c6f00f62145d732d8a3152e",
|
||||
"sha256:3d1f14f5f691f55e1b47f824ca4fdcb4b19b4323fe43cc7bb105988cad7496be",
|
||||
"sha256:408673ed75594933714482501fe97e055a42996087eeca7e5d06e33218d05aa8",
|
||||
"sha256:4134d3f1ba5f15027ff5c04296f13328fecd46921424084516bdb1b2548e66ff",
|
||||
"sha256:4ad2f835e0ad81d1689f1b7e3fbac7b01bb8777d5a985c8962bedee0cc6d43da",
|
||||
"sha256:50dff9cc21826d2977ef2d2a205504034e3a4563ca6f5db739b0d1026658e004",
|
||||
"sha256:510cef4a3f401c246cfd8227b300828715dd055463cdca6176c2e4036df8bd4f",
|
||||
"sha256:5aed7dde98403cd91d86a1115c78d8145c83078e864c1de1064f52e6feb61b20",
|
||||
"sha256:69bd1a15d7ba3694631e00df8de65a8cb031911ca11f44929c97fe05eb9b6c1d",
|
||||
"sha256:6bf088c1ce160f50ea40764f825ec9b72ed9da25346216b91361eef8ad1b8f8c",
|
||||
"sha256:6e8c66f70fb539301e064f6478d7453e820d8a2c631da948a23384865cd95544",
|
||||
"sha256:727dd1389bc5cb9827cbd1f9d40d2c2a1a0c9b32dd2261db522d22a604a6eec9",
|
||||
"sha256:74a04183e6e64930b667d321524e3c5361094bb4af9083db5c301db64cd341f3",
|
||||
"sha256:75e636fd3e0fb872693f23ccb8a5ff2cd578801251f3a4f6854c6a5d437d3c04",
|
||||
"sha256:7761afe0126d046974a01e030ae7529ed0ca6a196de3ec6937c11df0df1bc91c",
|
||||
"sha256:7888310f6214f19ab2b6df90f3f06afa3df7ef7355fc025e78a3044737fab1f5",
|
||||
"sha256:7b0554af24df2bf96618dac71ddada02420f946be943b181108cac55a7a2dcd4",
|
||||
"sha256:7c7b502bc34f6e32ba022b4a209638f9e097d7a9098104ae420eb8186217ebbb",
|
||||
"sha256:808add66ea764ed97d44dda1ac4f2cfec4c1867d9efb16a33d158be79f32b8a4",
|
||||
"sha256:831e648102c82f152e14c1a0938689dbb22480c548c8d4b8b248b3e50967b88c",
|
||||
"sha256:93689632949aff41199090eff5474f3990b6823404e45d66a5d44304e9cdc467",
|
||||
"sha256:96b5e6874431df16aee0c1ba237574cb6dff1dcb173798faa6a9d8b399a05d0e",
|
||||
"sha256:9a54614049a18a2d6fe156e68e188da02a046a4a93cf24f373bffd977e943421",
|
||||
"sha256:a138441e95562b3c078746a22f8fca8ff1c22c014f856278bdbdd89ca36cff1b",
|
||||
"sha256:a647c0d4478b995c5e54615a2e5360ccedd2f85e70ab57fbe817ca613d5e63b8",
|
||||
"sha256:a9c9bc489f8ab30906d7a85afac4b4944a572a7432e00698a7239f44a44e6efb",
|
||||
"sha256:ad2277b185ebce47a63f4dc6302e30f05762b688f8dc3de55dbae4651872cdf3",
|
||||
"sha256:b6d5e92df2b77665e07ddb2e4dbd6d644b78e4c0d2e9272a852627cdba0d75cf",
|
||||
"sha256:bc431b065722a5ad1dfb4df354fb9333b7a582a5ee39a90e6ffff688d72f27a1",
|
||||
"sha256:bdd0de2d64688ecae88dd8935012c4a72681e5df632af903a1dca8c5e7aa871a",
|
||||
"sha256:c79698d4cd9318d9481d89a77e2d3fcaeff5486be641e60a4b49f3d2ecca4e28",
|
||||
"sha256:cb6259196a589123d755380b65127ddc60f4c64b21fc3bb46ce3a6ea663659b0",
|
||||
"sha256:d5b87da55a08acb586bad5c3aa3b86505f559b84f39035b233d5bf844b0834b1",
|
||||
"sha256:dcd7b9c7139dc8258d164b55696ecd16c04607f1cc33ba7af86613881ffe4ac8",
|
||||
"sha256:dfe4c1fedfde4e2fbc009d5ad420647f7730d719786388b7de0999bf32c0d9fd",
|
||||
"sha256:ea98f633d45f7e815db648fd7ff0f19e328302ac36427343e4432c84432e7ff4",
|
||||
"sha256:ec52c351b35ca269cb1f8069d610fc45c5bd38c3e91f9ab4cbbf0aebc136d9c8",
|
||||
"sha256:eef7592281f7c174d3d6cbfbb7ee5984a671fcd77e3fc78e973d492e9bf0eb3f",
|
||||
"sha256:f07f1f00e22b231dd3d9b9208692042e29792d6bd4f6639415d2f23158a80013",
|
||||
"sha256:f3fac744f9b540148fa7715a435d2283b71f68bfb6d4aae24482a890aed18b59",
|
||||
"sha256:fa768eff5f9f958270b081bb33581b4b569faabf8774726b283edb06617101dc",
|
||||
"sha256:fac2d65901fb0fdf20363fbd345c01958a742f2dc62a8dd4495af66e3ff502a4"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==8.3.1"
|
||||
"version": "==9.2.0"
|
||||
},
|
||||
"platformdirs": {
|
||||
"hashes": [
|
||||
"sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788",
|
||||
"sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.5.2"
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
"sha256:289cdc0969d589d90752582bef6dff57c5fbc6949ee8b013ad6d6449a8ae9437",
|
||||
"sha256:beaba44501f89d785be791c9462553f06958a221d166c64e1f107320f839acc2"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2.9.0"
|
||||
},
|
||||
"pycparser": {
|
||||
"hashes": [
|
||||
"sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
|
||||
"sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
|
||||
"sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9",
|
||||
"sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.20"
|
||||
"version": "==2.21"
|
||||
},
|
||||
"pyflakes": {
|
||||
"hashes": [
|
||||
"sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2",
|
||||
"sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2.5.0"
|
||||
},
|
||||
"pypdfium": {
|
||||
"hashes": [
|
||||
|
@ -224,48 +412,59 @@
|
|||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
|
||||
"sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
|
||||
"sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983",
|
||||
"sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
|
||||
"version": "==2.26.0"
|
||||
"markers": "python_version >= '3.7' and python_version < '4'",
|
||||
"version": "==2.28.1"
|
||||
},
|
||||
"requests-oauthlib": {
|
||||
"hashes": [
|
||||
"sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d",
|
||||
"sha256:b4261601a71fd721a8bd6d7aa1cc1d6a8a93b4a9f5e96626f8e4d91e8beeaa6a",
|
||||
"sha256:fa6c47b933f01060936d87ae9327fead68768b69c6c9ea2109c48be30f2d4dbc"
|
||||
"sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5",
|
||||
"sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"
|
||||
],
|
||||
"version": "==1.3.0"
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==1.3.1"
|
||||
},
|
||||
"shapely": {
|
||||
"hashes": [
|
||||
"sha256:052eb5b9ba756808a7825e8a8020fb146ec489dd5c919e7d139014775411e688",
|
||||
"sha256:1641724c1055459a7e2b8bbe47ba25bdc89554582e62aec23cb3f3ca25f9b129",
|
||||
"sha256:17df66e87d0fe0193910aeaa938c99f0b04f67b430edb8adae01e7be557b141b",
|
||||
"sha256:182716ffb500d114b5d1b75d7fd9d14b7d3414cef3c38c0490534cc9ce20981a",
|
||||
"sha256:2df5260d0f2983309776cb41bfa85c464ec07018d88c0ecfca23d40bfadae2f1",
|
||||
"sha256:35be1c5d869966569d3dfd4ec31832d7c780e9df760e1fe52131105685941891",
|
||||
"sha256:46da0ea527da9cf9503e66c18bab6981c5556859e518fe71578b47126e54ca93",
|
||||
"sha256:4c10f317e379cc404f8fc510cd9982d5d3e7ba13a9cfd39aa251d894c6366798",
|
||||
"sha256:4f3c59f6dbf86a9fc293546de492f5e07344e045f9333f3a753f2dda903c45d1",
|
||||
"sha256:60e5b2282619249dbe8dc5266d781cc7d7fb1b27fa49f8241f2167672ad26719",
|
||||
"sha256:617bf046a6861d7c6b44d2d9cb9e2311548638e684c2cd071d8945f24a926263",
|
||||
"sha256:6593026cd3f5daaea12bcc51ae5c979318070fefee210e7990cb8ac2364e79a1",
|
||||
"sha256:6871acba8fbe744efa4f9f34e726d070bfbf9bffb356a8f6d64557846324232b",
|
||||
"sha256:791477edb422692e7dc351c5ed6530eb0e949a31b45569946619a0d9cd5f53cb",
|
||||
"sha256:8e7659dd994792a0aad8fb80439f59055a21163e236faf2f9823beb63a380e19",
|
||||
"sha256:8f15b6ce67dcc05b61f19c689b60f3fe58550ba994290ff8332f711f5aaa9840",
|
||||
"sha256:90a3e2ae0d6d7d50ff2370ba168fbd416a53e7d8448410758c5d6a5920646c1d",
|
||||
"sha256:a3774516c8a83abfd1ddffb8b6ec1b0935d7fe6ea0ff5c31a18bfdae567b4eba",
|
||||
"sha256:a5c3a50d823c192f32615a2a6920e8c046b09e07a58eba220407335a9cd2e8ea",
|
||||
"sha256:b40cc7bb089ae4aa9ddba1db900b4cd1bce3925d2a4b5837b639e49de054784f",
|
||||
"sha256:da38ed3d65b8091447dc3717e5218cc336d20303b77b0634b261bc5c1aa2bae8",
|
||||
"sha256:de618e67b64a51a0768d26a9963ecd7d338a2cf6e9e7582d2385f88ad005b3d1",
|
||||
"sha256:e3afccf0437edc108eef1e2bb9cc4c7073e7705924eb4cd0bf7715cd1ef0ce1b"
|
||||
"sha256:0c0fd457ce477b1dced507a72f1e2084c9191bfcb8a1e09886990ebd02acf024",
|
||||
"sha256:137f1369630408024a62ff79a437a5657e6c5b76b9cd352dde704b425acdb298",
|
||||
"sha256:15a856fbb588ad5d042784e00918c662902776452008c771ecba2ff615cd197a",
|
||||
"sha256:1d95842cc6bbbeab673061b63e70b07be9a375c15a60f4098f8fbd29f43af1b4",
|
||||
"sha256:256bdf8080bb7bb504d47b2c76919ecebab9708cc1b26266b3ec32b42448f642",
|
||||
"sha256:2e02da2e988e74d61f15c720f9f613fab51942aae2dfeacdcb78eadece00e1f3",
|
||||
"sha256:3423299254deec075e79fb7dc7909d702104e4167149de7f45510c3a6342eeea",
|
||||
"sha256:3a40bf497b57a6625b83996aed10ce2233bca0e5471b8af771b186d681433ac5",
|
||||
"sha256:44d2832c1b706bf43101fda92831a083467cc4b4923a7ed17319ab599c1025d8",
|
||||
"sha256:5254240eefc44139ab0d128faf671635d8bdd9c23955ee063d4d6b8f20073ae0",
|
||||
"sha256:56413f7d32c70b63f239eb0865b24c0c61029e38757de456cc4ab3c416559a0b",
|
||||
"sha256:572af9d5006fd5e3213e37ee548912b0341fb26724d6dc8a4e3950c10197ebb6",
|
||||
"sha256:62056e64b12b6d483d79f8e34bf058d2fe734d51c9227c1713705399434eff3b",
|
||||
"sha256:68c8e18dc9dc8a198c3addc8c9596f64137101f566f04b96ecfca0b214cb8b12",
|
||||
"sha256:6bdc7728f1e5df430d8c588661f79f1eed4a2728c8b689e12707cfec217f68f8",
|
||||
"sha256:6fcb28836ae93809de1dde73c03c9c24bab0ba2b2bf419ddb2aeb72c96d110e9",
|
||||
"sha256:75042e8039c79dd01f102bb288beace9dc2f49fc44a2dea875f9b697aa8cd30d",
|
||||
"sha256:78966332a89813b237de357a03f612fd451a871fe6e26c12b6b71645fe8eee39",
|
||||
"sha256:7c8eda45085ccdd7f9805ea4a93fdd5eb0b6039a61d5f0cefb960487e6dc17a1",
|
||||
"sha256:7c9e3400b716c51ba43eea1678c28272580114e009b6c78cdd00c44df3e325fa",
|
||||
"sha256:840be3f27a1152851c54b968f2e12d718c9f13b7acd51c482e58a70f60f29e31",
|
||||
"sha256:8e3ed52a081da58eb4a885c157c594876633dbd4eb283f13ba5bf39c82322d76",
|
||||
"sha256:8fe641f1f61b3d43dd61b5a85d2ef023e6e19bf8f204a5160a1cb1ec645cbc09",
|
||||
"sha256:a58e1f362f2091743e5e13212f5d5d16251a4bb63dd0ed587c652d3be9620d3a",
|
||||
"sha256:a60861b5ca2c488ebcdc706eca94d325c26d1567921c74acc83df5e6913590c7",
|
||||
"sha256:beee3949ddf381735049cfa6532fb234d5d20a5be910c4f2fb7c7295fd7960e3",
|
||||
"sha256:c0a0d7752b145343838bd36ed09382d85f5befe426832d7384c5b051c147acbd",
|
||||
"sha256:c60f3758212ec480675b820b13035dda8af8f7cc560d2cc67999b2717fb8faef",
|
||||
"sha256:ce0b5c5f7acbccf98b3460eecaa40e9b18272b2a734f74fcddf1d7696e047e95",
|
||||
"sha256:cec89a5617c0137f4678282e983c3d63bf838fb00cdf318cc555b4d8409f7130",
|
||||
"sha256:d3f3fac625690f01f35af665649e993f15f924e740b5c0ac0376900655815521",
|
||||
"sha256:d74de394684d66e25e780b0359fda85be7766af85940fa2dfad728b1a815c71f",
|
||||
"sha256:e07b0bd2a0e61a8afd4d1c1bd23f3550b711f01274ffb53de99358fd781eefd8",
|
||||
"sha256:f12695662c3ad1e6031b3de98f191963d0f09de6d1a4988acd907405644032ba"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.7.1"
|
||||
"version": "==1.8.2"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
|
@ -275,39 +474,54 @@
|
|||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==1.16.0"
|
||||
},
|
||||
"tomli": {
|
||||
"hashes": [
|
||||
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
|
||||
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
|
||||
],
|
||||
"markers": "python_full_version < '3.11.0a7'",
|
||||
"version": "==2.0.1"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02",
|
||||
"sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.3.0"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
|
||||
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
|
||||
"sha256:c33ccba33c819596124764c23a97d25f32b28433ba0dedeb77d873a38722c9bc",
|
||||
"sha256:ea6e8fb210b19d950fab93b60c9009226c63a28808bc8386e05301e25883ac0a"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==1.26.6"
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'",
|
||||
"version": "==1.26.11"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"atomicwrites": {
|
||||
"hashes": [
|
||||
"sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197",
|
||||
"sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"
|
||||
"sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"
|
||||
],
|
||||
"markers": "sys_platform == 'win32'",
|
||||
"version": "==1.4.0"
|
||||
"version": "==1.4.1"
|
||||
},
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1",
|
||||
"sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"
|
||||
"sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6",
|
||||
"sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==21.2.0"
|
||||
"markers": "python_version >= '3.5'",
|
||||
"version": "==22.1.0"
|
||||
},
|
||||
"colorama": {
|
||||
"hashes": [
|
||||
"sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b",
|
||||
"sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"
|
||||
"sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da",
|
||||
"sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"
|
||||
],
|
||||
"markers": "sys_platform == 'win32'",
|
||||
"version": "==0.4.4"
|
||||
"markers": "platform_system == 'Windows'",
|
||||
"version": "==0.4.5"
|
||||
},
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
|
@ -318,51 +532,51 @@
|
|||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7",
|
||||
"sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"
|
||||
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
|
||||
"sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==21.0"
|
||||
"version": "==21.3"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
|
||||
"sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
|
||||
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
|
||||
"sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.13.1"
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.0.0"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
|
||||
"sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
|
||||
"sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719",
|
||||
"sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==1.10.0"
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==1.11.0"
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
"sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
|
||||
"sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
|
||||
"sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb",
|
||||
"sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"
|
||||
],
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.4.7"
|
||||
"markers": "python_full_version >= '3.6.8'",
|
||||
"version": "==3.0.9"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b",
|
||||
"sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890"
|
||||
"sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c",
|
||||
"sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==6.2.4"
|
||||
"version": "==7.1.2"
|
||||
},
|
||||
"toml": {
|
||||
"tomli": {
|
||||
"hashes": [
|
||||
"sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
|
||||
"sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
|
||||
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
|
||||
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
|
||||
],
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.10.2"
|
||||
"markers": "python_full_version < '3.11.0a7'",
|
||||
"version": "==2.0.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,11 +10,11 @@ The OCR.json and labels.json will also be redacted while keeping the semantics o
|
|||
![ocr-before-after-redaction](./images/ocr-before-after-redaction.png)
|
||||
![labels-before-after-redaction](./images/labels-before-after-redaction.png)
|
||||
|
||||
## Language support
|
||||
## Language Support
|
||||
This tool supports Latin characters redaction only. For any non-Latin document support, please [contact us](mailto:formrecog_contact@microsoft.com?subject=Redaction%20tool%20language%20support).
|
||||
|
||||
## Version
|
||||
Redact CLI 0.2.3
|
||||
Redact CLI 0.3.2
|
||||
|
||||
## Setup Environment
|
||||
|
||||
|
@ -103,7 +103,21 @@ python redact.py image <image_path> <fott_label_path> <output_path>
|
|||
### Redact OCR Result
|
||||
|
||||
``` bash
|
||||
python redact.py ocr <ocr_result_path> <fott_label_path> <output_path>
|
||||
python redact.py ocr <ocr_result_path> <fott_label_path> <output_path> <api_version>
|
||||
```
|
||||
|
||||
#### API Version
|
||||
|
||||
In Azure Form Recognizer, The OCR result for different API version has different schema. To successfully redact the OCR result, you must give one of the `<api_version>` to the redaction toolkit.
|
||||
|
||||
- v2.0
|
||||
- v2.1
|
||||
- v3.0
|
||||
|
||||
For example,
|
||||
|
||||
``` bash
|
||||
python redact.py ocr sample.ocr.json sample.labels.json redacted_sample.ocr.json "v3.0"
|
||||
```
|
||||
|
||||
### Redact FOTT Label Path
|
||||
|
@ -113,6 +127,7 @@ python redact.py fott <fott_label_path> <output_path>
|
|||
```
|
||||
|
||||
### Redact specific labels from Image, OCR results or FOTT Label Path
|
||||
|
||||
In some specific use-cases, the need may arise to redact specific labels from an image, OCR results or/and FOTT Label Path.
|
||||
Labels to be redacted need to provided together in a string separated by commas.
|
||||
|
||||
|
@ -127,17 +142,17 @@ And _Label_01_ and _Label_04_ need to be redacted, the following commands can be
|
|||
#### Redact specific labels from Image
|
||||
|
||||
``` bash
|
||||
python redact.py image <fott_label_path> <output_path> "Label_01,Label_04"
|
||||
python redact.py image <fott_label_path> <output_path> <api_version> "Label_01,Label_04"
|
||||
```
|
||||
#### Redact specific labels from OCR Result
|
||||
|
||||
``` bash
|
||||
python redact.py ocr <ocr_result_path> <image_path> <fott_label_path> <output_path> "Label_01,Label_04"
|
||||
python redact.py ocr <ocr_result_path> <image_path> <fott_label_path> <output_path> <api_version> "Label_01,Label_04"
|
||||
```
|
||||
#### Redact specific labels from FOTT Label Path
|
||||
|
||||
``` bash
|
||||
python redact.py image <image_path> <fott_label_path> <output_path> "Label_01,Label_04"
|
||||
python redact.py image <image_path> <fott_label_path> <output_path> <api_version> "Label_01,Label_04"
|
||||
```
|
||||
|
||||
### Batch Redaction
|
||||
|
@ -146,7 +161,7 @@ Batch redaction supports redacting a folder rather than executing on a single fi
|
|||
2. Azure Blob Storage virtual folder: a URL to a Blob Storage container and a folder path to denotes the folder.
|
||||
|
||||
``` bash
|
||||
python batch_redact.py <input_container> <input_folder_path> <output_container> <output_folder_path>
|
||||
python batch_redact.py <input_container> <input_folder_path> <output_container> <output_folder_path> <api_version>
|
||||
```
|
||||
|
||||
#### Container
|
||||
|
@ -176,12 +191,16 @@ python batch_redact.py local raw/ "https://my.blob.account/data?<my_secret_SAS_t
|
|||
python batch_redact.py "https://my.blob.account/data?<my_secret_SAS_token>" folder1/ "https://my.blob.account/data?<my_secret_SAS_token>" folder2/
|
||||
```
|
||||
|
||||
#### Note
|
||||
---
|
||||
|
||||
**NOTE**
|
||||
|
||||
1. Surround the URL with double quotes to prevent wrong character escape in the SAS token.
|
||||
2. Visit [Create Your SAS tokens with Azure Storage Explorer](https://docs.microsoft.com/en-us/azure/cognitive-services/translator/document-translation/create-sas-tokens?tabs=Containers) to see how to create a SAS token for this program to use.
|
||||
3. Currently, this redact CLI only support ASCII character redaction (Latin alphabets without the accent marks).
|
||||
|
||||
---
|
||||
|
||||
#### PDF Support
|
||||
|
||||
Batch mode now supports redacting data from one-page PDF documents. The tool will detect any PDF document in the input folder, convert to an image (.png) and redact the image itself placing it in the specified output folder upon completion.
|
||||
|
@ -204,7 +223,17 @@ pytest
|
|||
|
||||
in the root folder.
|
||||
|
||||
### Note
|
||||
---
|
||||
|
||||
**NOTE**
|
||||
|
||||
1. You can also take a look at the `redact/__init__.py` file. The command line interface (CLI) is just a thin wrapper on `redact_image()`, `redact_ocr_result()`, and `redact_fott_label()`. You could extend the code on top of the three functions for achieving your own goal, such as to redact a batch of data.
|
||||
2. For batch redaction, we currently only support `.jpeg`, `.jpg`, `.png`, `.tif`, `.tiff`, and `.bmp` as the file extension for images. PDF files are not supported.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [Form Recognizer API v2.0](https://westus2.dev.cognitive.microsoft.com/docs/services/form-recognizer-api-v2/operations/AnalyzeWithCustomForm)
|
||||
- [Form Recognizer API v2.1](https://westus.dev.cognitive.microsoft.com/docs/services/form-recognizer-api-v2-1/operations/AnalyzeWithCustomForm)
|
||||
- [Form Recognizer API v3.0](https://westus.dev.cognitive.microsoft.com/docs/services/form-recognizer-api-2022-08-31/operations/GetAnalyzeDocumentResult)
|
||||
|
|
|
@ -8,15 +8,14 @@ import shutil
|
|||
from typing import List
|
||||
from uuid import uuid4
|
||||
|
||||
from redact import redact_image, redact_fott_label, redact_ocr_result
|
||||
from redact import redact_fott_label, redact_ocr_result, redact_file_bundle
|
||||
from redact.io.blob_reader import BlobReader
|
||||
from redact.io.blob_writer import BlobWriter
|
||||
from redact.io.local_reader import LocalReader
|
||||
from redact.io.local_writer import LocalWriter
|
||||
from redact.utils.file_name import get_redacted_file_name, valid_url
|
||||
from redact.utils.pdf_renderer import PdfRenderer
|
||||
from redact.types.file_bundle import FileType, FileBundle
|
||||
from redact.types.pre_processing_bundle import PdfPreProcessingBundle
|
||||
from redact.preprocess import preprocess_multi_page_bundle
|
||||
|
||||
|
||||
# Strong Assumption: assume all valid URLs are Azure Blob URL.
|
||||
|
@ -24,92 +23,82 @@ def is_blob_url(url: str) -> bool:
|
|||
return valid_url(url)
|
||||
|
||||
|
||||
def process_pdf_bundle(file_bundles: List[FileBundle], fields_to_redact: List[str]):
|
||||
renderer = PdfRenderer()
|
||||
|
||||
for file_bundle in file_bundles:
|
||||
pdf_pre_processing_bundle = PdfPreProcessingBundle.from_file_bundle(file_bundle)
|
||||
|
||||
redacted_image_name = get_redacted_file_name(pdf_pre_processing_bundle.rendered_file_name)
|
||||
redacted_fott_name = get_redacted_file_name(file_bundle.fott_file_name)
|
||||
redacted_ocr_name = get_redacted_file_name(file_bundle.ocr_file_name)
|
||||
|
||||
# Render PDF
|
||||
renderer.render_pdf_and_save(
|
||||
Path(build_pre_processing_folder, file_bundle.image_file_name),
|
||||
Path(build_pre_processing_folder, pdf_pre_processing_bundle.rendered_file_name),
|
||||
target_pdf_render_dpi)
|
||||
|
||||
# Follow the regular redaction process with taking files from slightly different source folders
|
||||
redact_image(
|
||||
Path(build_pre_processing_folder, pdf_pre_processing_bundle.rendered_file_name),
|
||||
Path(build_pre_processing_folder, file_bundle.fott_file_name),
|
||||
Path(build_output_folder, redacted_image_name),
|
||||
fields_to_redact)
|
||||
redact_fott_label(
|
||||
Path(build_pre_processing_folder, file_bundle.fott_file_name),
|
||||
Path(build_output_folder, redacted_fott_name),
|
||||
fields_to_redact)
|
||||
redact_ocr_result(
|
||||
Path(build_pre_processing_folder, file_bundle.ocr_file_name),
|
||||
Path(build_pre_processing_folder, file_bundle.fott_file_name),
|
||||
Path(build_output_folder, redacted_ocr_name),
|
||||
fields_to_redact)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
input_container = sys.argv[1]
|
||||
input_path = sys.argv[2]
|
||||
output_container = sys.argv[3]
|
||||
output_path = sys.argv[4]
|
||||
api_version = sys.argv[5]
|
||||
target_pdf_render_dpi = 300
|
||||
fields_to_redact = []
|
||||
fields_to_redact = tuple()
|
||||
|
||||
if len(sys.argv) >= 6:
|
||||
fields_to_redact = (sys.argv[5].split(','))
|
||||
if len(sys.argv) >= 7:
|
||||
fields_to_redact = sys.argv[6].split(",")
|
||||
|
||||
# Random generated UUID in the build folder name for preventing collapse.
|
||||
build_path = Path(f'build-{uuid4()}/')
|
||||
build_pre_processing_folder = Path(build_path, "pre/")
|
||||
build_path = Path(f"build-{uuid4()}/")
|
||||
build_pre_folder = Path(build_path, "pre/")
|
||||
build_input_folder = Path(build_path, "in/")
|
||||
build_output_folder = Path(build_path, "out/")
|
||||
Path(build_pre_processing_folder).mkdir(parents=True, exist_ok=True)
|
||||
Path(build_pre_folder).mkdir(parents=True, exist_ok=True)
|
||||
Path(build_input_folder).mkdir(parents=True, exist_ok=True)
|
||||
Path(build_output_folder).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
file_bundle_list = None
|
||||
pdf_file_bundle_list = None
|
||||
multi_page_bundle_list = None
|
||||
if is_blob_url(input_container):
|
||||
reader = BlobReader(input_container, input_path)
|
||||
pdf_file_bundle_list = reader.download_bundles(to=build_pre_processing_folder, mode=FileType.PDF_ONLY)
|
||||
multi_page_bundle_list = reader.download_bundles(
|
||||
to=build_pre_folder, mode=FileType.MULTI_PAGE
|
||||
)
|
||||
file_bundle_list = reader.download_bundles(to=build_input_folder)
|
||||
else:
|
||||
reader = LocalReader(input_path)
|
||||
pdf_file_bundle_list = reader.copy_bundles(to=build_pre_processing_folder, mode=FileType.PDF_ONLY)
|
||||
multi_page_bundle_list = reader.copy_bundles(
|
||||
to=build_pre_folder, mode=FileType.MULTI_PAGE
|
||||
)
|
||||
file_bundle_list = reader.copy_bundles(to=build_input_folder)
|
||||
|
||||
per_page_bundle_list: List[FileBundle] = []
|
||||
|
||||
# Render and process PDF/TIFF files if any.
|
||||
if multi_page_bundle_list is not None:
|
||||
for fb in multi_page_bundle_list:
|
||||
bundle_list = preprocess_multi_page_bundle(
|
||||
fb, build_pre_folder, build_input_folder, target_pdf_render_dpi
|
||||
)
|
||||
per_page_bundle_list.extend(bundle_list)
|
||||
|
||||
# Short path: preprocess folder -> output folder.
|
||||
# We still need to redact the full label file.
|
||||
redact_fott_label(
|
||||
Path(build_pre_folder, fb.fott_file_name),
|
||||
Path(
|
||||
build_output_folder, get_redacted_file_name(fb.fott_file_name)
|
||||
),
|
||||
fields_to_redact,
|
||||
)
|
||||
|
||||
# We still need to redact the full ocr file.
|
||||
redact_ocr_result(
|
||||
Path(build_pre_folder, fb.ocr_file_name),
|
||||
Path(build_pre_folder, fb.fott_file_name),
|
||||
Path(build_output_folder, get_redacted_file_name(fb.ocr_file_name)),
|
||||
api_version,
|
||||
fields_to_redact,
|
||||
)
|
||||
|
||||
# Process images and per page result from multi-page documents.
|
||||
file_bundle_list.extend(per_page_bundle_list)
|
||||
for fb in file_bundle_list:
|
||||
redacted_image_name = get_redacted_file_name(fb.image_file_name)
|
||||
redacted_fott_name = get_redacted_file_name(fb.fott_file_name)
|
||||
redacted_ocr_name = get_redacted_file_name(fb.ocr_file_name)
|
||||
|
||||
redact_image(
|
||||
Path(build_input_folder, fb.image_file_name),
|
||||
Path(build_input_folder, fb.fott_file_name),
|
||||
Path(build_output_folder, redacted_image_name),
|
||||
fields_to_redact)
|
||||
redact_fott_label(
|
||||
Path(build_input_folder, fb.fott_file_name),
|
||||
Path(build_output_folder, redacted_fott_name),
|
||||
fields_to_redact)
|
||||
redact_ocr_result(
|
||||
Path(build_input_folder, fb.ocr_file_name),
|
||||
Path(build_input_folder, fb.fott_file_name),
|
||||
Path(build_output_folder, redacted_ocr_name),
|
||||
fields_to_redact)
|
||||
|
||||
# Render and process PDF files if any
|
||||
if pdf_file_bundle_list is not None:
|
||||
process_pdf_bundle(pdf_file_bundle_list, fields_to_redact)
|
||||
redact_file_bundle(
|
||||
fb,
|
||||
build_input_folder,
|
||||
build_output_folder,
|
||||
api_version,
|
||||
fields_to_redact,
|
||||
)
|
||||
|
||||
if is_blob_url(output_container):
|
||||
writer = BlobWriter(output_container, output_path)
|
||||
|
|
|
@ -6,30 +6,35 @@ import sys
|
|||
from redact import redact_image, redact_fott_label, redact_ocr_result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
operator = sys.argv[1]
|
||||
|
||||
if operator == 'image':
|
||||
labels_to_redact = [] if len(sys.argv) < 6 else sys.argv[5].split(',')
|
||||
if operator == "image":
|
||||
labels_to_redact = [] if len(sys.argv) < 6 else sys.argv[5].split(",")
|
||||
redact_image(
|
||||
image_path=sys.argv[2],
|
||||
fott_label_path=sys.argv[3],
|
||||
output_path=sys.argv[4],
|
||||
labels_to_redact=labels_to_redact)
|
||||
labels_to_redact=labels_to_redact,
|
||||
)
|
||||
|
||||
elif operator == 'fott':
|
||||
labels_to_redact = [] if len(sys.argv) < 5 else sys.argv[4].split(',')
|
||||
redact_fott_label(fott_label_path=sys.argv[2],
|
||||
output_path=sys.argv[3],
|
||||
labels_to_redact=labels_to_redact)
|
||||
elif operator == "fott":
|
||||
labels_to_redact = [] if len(sys.argv) < 5 else sys.argv[4].split(",")
|
||||
redact_fott_label(
|
||||
fott_label_path=sys.argv[2],
|
||||
output_path=sys.argv[3],
|
||||
labels_to_redact=labels_to_redact,
|
||||
)
|
||||
|
||||
elif operator == 'ocr':
|
||||
labels_to_redact = [] if len(sys.argv) < 6 else sys.argv[5].split(',')
|
||||
elif operator == "ocr":
|
||||
labels_to_redact = [] if len(sys.argv) < 7 else sys.argv[6].split(",")
|
||||
redact_ocr_result(
|
||||
ocr_result_path=sys.argv[2],
|
||||
fott_label_path=sys.argv[3],
|
||||
output_path=sys.argv[4],
|
||||
labels_to_redact=labels_to_redact)
|
||||
api_version=sys.argv[5],
|
||||
labels_to_redact=labels_to_redact,
|
||||
)
|
||||
|
||||
else:
|
||||
raise NameError()
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
from pathlib import Path
|
||||
import json
|
||||
from typing import List
|
||||
from typing import List, Collection
|
||||
|
||||
from PIL import Image, ImageOps
|
||||
from dacite import from_dict
|
||||
|
@ -12,12 +12,21 @@ from dacite import from_dict
|
|||
from redact.redaction.image_redaction import ImageRedaction
|
||||
from redact.redaction.ocr_result_redaction import OcrResultRedaction
|
||||
from redact.redaction.fott_label_redaction import FottLabelRedaction
|
||||
from redact.types.api_version import ApiVersion
|
||||
from redact.types.fott_label import FottLabel
|
||||
from redact.types.file_bundle import FileBundle
|
||||
from redact.utils.file_name import get_redacted_file_name
|
||||
|
||||
|
||||
def redact_image(image_path: str, fott_label_path: str, output_path: str, labels_to_redact: List[str] = []):
|
||||
with Image.open(image_path) as image, \
|
||||
open(fott_label_path, encoding='utf-8-sig') as fott_label_json:
|
||||
def redact_image(
|
||||
image_path: str,
|
||||
fott_label_path: str,
|
||||
output_path: str,
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
with Image.open(image_path) as image, open(
|
||||
fott_label_path, encoding="utf-8-sig"
|
||||
) as fott_label_json:
|
||||
|
||||
# Transpose the image based on EXIF orientation tag.
|
||||
image = ImageOps.exif_transpose(image)
|
||||
|
@ -26,17 +35,22 @@ def redact_image(image_path: str, fott_label_path: str, output_path: str, labels
|
|||
fott_label = from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
|
||||
# page_size = {page: (width, height)}
|
||||
annots = fott_label.to_annotations(
|
||||
page_size={1: (image.width, image.height)})
|
||||
annots = fott_label.to_annotations(page_size={1: (image.width, image.height)})
|
||||
|
||||
redaction = ImageRedaction(image=image, annotations=annots, labels_to_redact=labels_to_redact)
|
||||
redaction = ImageRedaction(
|
||||
image=image, annotations=annots, labels_to_redact=labels_to_redact
|
||||
)
|
||||
redaction.redact()
|
||||
|
||||
redaction.image.save(output_path)
|
||||
|
||||
|
||||
def redact_fott_label(fott_label_path: str, output_path: str, labels_to_redact: List[str] = []):
|
||||
with open(fott_label_path, encoding='utf-8-sig') as fott_label_json:
|
||||
def redact_fott_label(
|
||||
fott_label_path: str,
|
||||
output_path: str,
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
with open(fott_label_path, encoding="utf-8-sig") as fott_label_json:
|
||||
fott_label_dict = json.load(fott_label_json)
|
||||
fott_label = from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
|
||||
|
@ -48,20 +62,24 @@ def redact_fott_label(fott_label_path: str, output_path: str, labels_to_redact:
|
|||
def dumper(obj):
|
||||
try:
|
||||
return obj.toJSON()
|
||||
except:
|
||||
except AttributeError:
|
||||
return obj.__dict__
|
||||
|
||||
Path(output_path).write_text(
|
||||
json.dumps(redaction.fott_label, default=dumper), encoding='utf-8')
|
||||
json.dumps(redaction.fott_label, default=dumper), encoding="utf-8"
|
||||
)
|
||||
|
||||
|
||||
def redact_ocr_result(
|
||||
ocr_result_path: str,
|
||||
fott_label_path: str,
|
||||
output_path: str,
|
||||
labels_to_redact: List[str] = []):
|
||||
with open(ocr_result_path, encoding='utf-8-sig') as ocr_result_json, \
|
||||
open(fott_label_path, encoding='utf-8-sig') as fott_label_json:
|
||||
ocr_result_path: str,
|
||||
fott_label_path: str,
|
||||
output_path: str,
|
||||
api_version: ApiVersion,
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
with open(ocr_result_path, encoding="utf-8-sig") as ocr_result_json, open(
|
||||
fott_label_path, encoding="utf-8-sig"
|
||||
) as fott_label_json:
|
||||
fott_label_dict = json.load(fott_label_json)
|
||||
fott_label = from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
|
||||
|
@ -69,14 +87,63 @@ def redact_ocr_result(
|
|||
|
||||
# page_size = {page: (width, height)}
|
||||
page_size = {}
|
||||
for readResult in ocr_result["analyzeResult"]["readResults"]:
|
||||
page_size[readResult["page"]] = (
|
||||
readResult["width"], readResult["height"])
|
||||
if ApiVersion(api_version) in [
|
||||
ApiVersion.V2_0,
|
||||
ApiVersion.V2_1,
|
||||
]:
|
||||
for read_result in ocr_result["analyzeResult"]["readResults"]:
|
||||
page_size[read_result["page"]] = (
|
||||
read_result["width"],
|
||||
read_result["height"],
|
||||
)
|
||||
elif ApiVersion(api_version) in [
|
||||
ApiVersion.V3_0,
|
||||
]:
|
||||
pages = ocr_result["analyzeResult"]["pages"]
|
||||
for page in pages:
|
||||
page_number = page["pageNumber"]
|
||||
page_size[page_number] = (page["width"], page["height"])
|
||||
|
||||
annots = fott_label.to_annotations(page_size=page_size)
|
||||
|
||||
redaction = OcrResultRedaction(ocr_result, annots, labels_to_redact)
|
||||
redaction = OcrResultRedaction(
|
||||
ocr_result,
|
||||
annots,
|
||||
api_version,
|
||||
labels_to_redact,
|
||||
)
|
||||
|
||||
redaction.redact()
|
||||
|
||||
Path(output_path).write_text(
|
||||
json.dumps(redaction.ocr_result), encoding='utf-8')
|
||||
Path(output_path).write_text(json.dumps(redaction.ocr_result), encoding="utf-8")
|
||||
|
||||
|
||||
def redact_file_bundle(
|
||||
fb: FileBundle,
|
||||
in_folder: str,
|
||||
out_folder: str,
|
||||
api_version: ApiVersion,
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
redacted_image_name = get_redacted_file_name(fb.image_file_name)
|
||||
redacted_fott_name = get_redacted_file_name(fb.fott_file_name)
|
||||
redacted_ocr_name = get_redacted_file_name(fb.ocr_file_name)
|
||||
|
||||
redact_image(
|
||||
Path(in_folder, fb.image_file_name),
|
||||
Path(in_folder, fb.fott_file_name),
|
||||
Path(out_folder, redacted_image_name),
|
||||
labels_to_redact=labels_to_redact,
|
||||
)
|
||||
redact_fott_label(
|
||||
Path(in_folder, fb.fott_file_name),
|
||||
Path(out_folder, redacted_fott_name),
|
||||
labels_to_redact,
|
||||
)
|
||||
redact_ocr_result(
|
||||
Path(in_folder, fb.ocr_file_name),
|
||||
Path(in_folder, fb.fott_file_name),
|
||||
Path(out_folder, redacted_ocr_name),
|
||||
api_version,
|
||||
labels_to_redact,
|
||||
)
|
||||
|
|
|
@ -11,13 +11,14 @@ from redact.types.file_bundle import FileBundle
|
|||
from redact.types.file_bundle import FileType
|
||||
|
||||
|
||||
class BlobReader():
|
||||
class BlobReader:
|
||||
def __init__(self, container_url: str, prefix: str):
|
||||
self.container_client = ContainerClient.from_container_url(
|
||||
container_url)
|
||||
self.container_client = ContainerClient.from_container_url(container_url)
|
||||
self.prefix = prefix
|
||||
|
||||
def download_bundles(self, to: str, mode=FileType.IMAGE_ONLY) -> List[FileBundle]:
|
||||
def download_bundles(
|
||||
self, to: str, mode=FileType.SINGLE_PAGE_IMAGE
|
||||
) -> List[FileBundle]:
|
||||
blobs = self.container_client.list_blobs(name_starts_with=self.prefix)
|
||||
all_file_name_list = [Path(blob.name).name for blob in blobs]
|
||||
file_bundles = FileBundle.from_names(all_file_name_list, mode)
|
||||
|
@ -31,18 +32,18 @@ class BlobReader():
|
|||
fott_path = Path(to, bundle.fott_file_name)
|
||||
ocr_path = Path(to, bundle.ocr_file_name)
|
||||
|
||||
with open(image_path, 'wb') as image_file, \
|
||||
open(fott_path, 'wb') as fott_file, \
|
||||
open(ocr_path, 'wb') as ocr_file:
|
||||
with open(image_path, "wb") as image_file, open(
|
||||
fott_path, "wb"
|
||||
) as fott_file, open(ocr_path, "wb") as ocr_file:
|
||||
|
||||
image_file.write(
|
||||
self.container_client.
|
||||
download_blob(image_blob_path).readall())
|
||||
self.container_client.download_blob(image_blob_path).readall()
|
||||
)
|
||||
fott_file.write(
|
||||
self.container_client.
|
||||
download_blob(fott_blob_path).readall())
|
||||
self.container_client.download_blob(fott_blob_path).readall()
|
||||
)
|
||||
ocr_file.write(
|
||||
self.container_client.
|
||||
download_blob(ocr_blob_path).readall())
|
||||
self.container_client.download_blob(ocr_blob_path).readall()
|
||||
)
|
||||
|
||||
return file_bundles
|
||||
|
|
|
@ -7,16 +7,14 @@ from pathlib import Path
|
|||
from azure.storage.blob import ContainerClient
|
||||
|
||||
|
||||
class BlobWriter():
|
||||
class BlobWriter:
|
||||
def __init__(self, container_url: str, prefix: str):
|
||||
self.container_client = ContainerClient.from_container_url(
|
||||
container_url)
|
||||
self.container_client = ContainerClient.from_container_url(container_url)
|
||||
self.prefix = prefix
|
||||
|
||||
def upload_files(self, folder: str):
|
||||
for child in Path(folder).iterdir():
|
||||
with open(child, "rb") as data:
|
||||
self.container_client.upload_blob(
|
||||
name=self.prefix + child.name,
|
||||
data=data,
|
||||
overwrite=True)
|
||||
name=self.prefix + child.name, data=data, overwrite=True
|
||||
)
|
||||
|
|
|
@ -10,12 +10,14 @@ from redact.types.file_bundle import FileBundle
|
|||
from redact.types.file_bundle import FileType
|
||||
|
||||
|
||||
class LocalReader():
|
||||
class LocalReader:
|
||||
def __init__(self, input_path: str):
|
||||
self.input_path = Path(input_path)
|
||||
|
||||
def copy_bundles(self, to: str, mode=FileType.IMAGE_ONLY) -> List[FileBundle]:
|
||||
file_names = [path.name for path in self.input_path.glob('**/*')]
|
||||
def copy_bundles(
|
||||
self, to: str, mode=FileType.SINGLE_PAGE_IMAGE
|
||||
) -> List[FileBundle]:
|
||||
file_names = [path.name for path in self.input_path.glob("**/*")]
|
||||
file_bundles = FileBundle.from_names(file_names, mode)
|
||||
|
||||
for bundle in file_bundles:
|
||||
|
|
|
@ -6,7 +6,7 @@ from pathlib import Path
|
|||
import shutil
|
||||
|
||||
|
||||
class LocalWriter():
|
||||
class LocalWriter:
|
||||
def __init__(self, output_path: str):
|
||||
self.output_path = Path(output_path)
|
||||
Path(self.output_path).mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from redact.types.file_bundle import FileBundle
|
||||
from redact.utils.file_name import get_page_file_name, is_pdf, is_tiff
|
||||
from redact.preprocess.pdf_renderer import PdfRenderer
|
||||
from redact.preprocess.tiff_renderer import TiffRenderer
|
||||
from redact.preprocess.multi_page import extract_page_label, extract_page_ocr
|
||||
|
||||
|
||||
def preprocess_multi_page_bundle(
|
||||
fb: FileBundle,
|
||||
pre_folder: str,
|
||||
in_folder: str,
|
||||
target_pdf_render_dpi: int = 300,
|
||||
) -> List[FileBundle]:
|
||||
if is_pdf(fb.image_file_name):
|
||||
renderer = PdfRenderer()
|
||||
elif is_tiff(fb.image_file_name):
|
||||
renderer = TiffRenderer()
|
||||
else:
|
||||
raise ValueError("File should be PDF or TIFF.")
|
||||
|
||||
ret = []
|
||||
page_count = renderer.get_page_count(
|
||||
Path(pre_folder, fb.image_file_name),
|
||||
)
|
||||
for page in range(1, page_count + 1):
|
||||
# Render raw image per page.
|
||||
page_image_name = get_page_file_name(
|
||||
fb.image_file_name,
|
||||
page,
|
||||
".rendered.png",
|
||||
)
|
||||
if is_pdf(fb.image_file_name):
|
||||
renderer.render_pdf_and_save(
|
||||
Path(pre_folder, fb.image_file_name),
|
||||
Path(in_folder, page_image_name),
|
||||
target_pdf_render_dpi,
|
||||
page_number=page,
|
||||
)
|
||||
elif is_tiff(fb.image_file_name):
|
||||
renderer.render_tiff_and_save(
|
||||
Path(pre_folder, fb.image_file_name),
|
||||
Path(in_folder, page_image_name),
|
||||
page_number=page,
|
||||
)
|
||||
else:
|
||||
raise ValueError("File should be PDF or TIFF.")
|
||||
|
||||
# Extract raw FOTT file per page.
|
||||
page_fott_file_name = get_page_file_name(
|
||||
fb.image_file_name,
|
||||
page,
|
||||
".rendered.png.labels.json",
|
||||
)
|
||||
extract_page_label(
|
||||
Path(pre_folder, fb.fott_file_name),
|
||||
Path(in_folder, page_fott_file_name),
|
||||
page,
|
||||
)
|
||||
|
||||
# Extract raw OCR file per page.
|
||||
page_ocr_file_name = get_page_file_name(
|
||||
fb.image_file_name,
|
||||
page,
|
||||
".rendered.png.ocr.json",
|
||||
)
|
||||
extract_page_ocr(
|
||||
Path(pre_folder, fb.ocr_file_name),
|
||||
Path(in_folder, page_ocr_file_name),
|
||||
page,
|
||||
)
|
||||
|
||||
ret.append(
|
||||
FileBundle(
|
||||
image_file_name=page_image_name,
|
||||
fott_file_name=page_fott_file_name,
|
||||
ocr_file_name=page_ocr_file_name,
|
||||
)
|
||||
)
|
||||
return ret
|
|
@ -0,0 +1,48 @@
|
|||
from pathlib import Path
|
||||
import json
|
||||
|
||||
from dacite import from_dict
|
||||
|
||||
from redact.types.fott_label import FottLabel
|
||||
|
||||
|
||||
def extract_page_label(fott_label_path: str, output_path: str, page_number: int):
|
||||
with open(fott_label_path, encoding="utf-8-sig") as fott_label_json:
|
||||
fott_label_dict = json.load(fott_label_json)
|
||||
fott_label = from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
|
||||
selected_labels = []
|
||||
|
||||
for label in fott_label.labels:
|
||||
selected_entities = []
|
||||
for entity in label.value:
|
||||
if entity.page == page_number:
|
||||
entity.page = 1
|
||||
selected_entities.append(entity)
|
||||
if len(selected_entities) > 0:
|
||||
selected_labels.append(label)
|
||||
|
||||
fott_label.labels = selected_labels
|
||||
|
||||
# Custom dumper because default JSON serializer
|
||||
# does not support FottLabel.
|
||||
def dumper(obj):
|
||||
try:
|
||||
return obj.toJSON()
|
||||
except AttributeError:
|
||||
return obj.__dict__
|
||||
|
||||
Path(output_path).write_text(
|
||||
json.dumps(fott_label, default=dumper), encoding="utf-8"
|
||||
)
|
||||
|
||||
|
||||
def extract_page_ocr(ocr_result_path: str, output_path: str, page_number: int):
|
||||
with open(ocr_result_path, encoding="utf-8-sig") as ocr_result_json:
|
||||
ocr_result = json.load(ocr_result_json)
|
||||
|
||||
new_read_results = [ocr_result["analyzeResult"]["readResults"][page_number - 1]]
|
||||
new_read_results[0]["page"] = 1
|
||||
ocr_result["analyzeResult"]["readResults"] = new_read_results
|
||||
|
||||
Path(output_path).write_text(json.dumps(ocr_result), encoding="utf-8")
|
|
@ -0,0 +1,90 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from PIL import Image
|
||||
|
||||
import ctypes
|
||||
import pypdfium as pdfium
|
||||
|
||||
WHITE = 0xFFFFFFFF
|
||||
|
||||
|
||||
class PdfRenderer:
|
||||
def __init__(self):
|
||||
# Initiate PDFium - This only needs to happen once
|
||||
pdfium.FPDF_InitLibraryWithConfig(pdfium.FPDF_LIBRARY_CONFIG(2, None, None, 0))
|
||||
|
||||
def get_page_count(self, input_file: str):
|
||||
doc = pdfium.FPDF_LoadDocument(str(input_file), None)
|
||||
page_count = pdfium.FPDF_GetPageCount(doc)
|
||||
pdfium.FPDF_CloseDocument(doc)
|
||||
return page_count
|
||||
|
||||
def render_pdf(
|
||||
self, input_file: str, render_target_dpi: int, page_number: int = 1
|
||||
) -> Image:
|
||||
"""
|
||||
This renders a PDF page into an Image.
|
||||
|
||||
:param input_file: a path points to the PDF.
|
||||
:param render_target_dpi: the target DPI for rendering the image.
|
||||
:param page_number: an **1-based** page index for the to-be-rendered page.
|
||||
:returns: an Image of the PDF page with the target DPI.
|
||||
"""
|
||||
doc = pdfium.FPDF_LoadDocument(str(input_file), None)
|
||||
|
||||
page = pdfium.FPDF_LoadPage(doc, page_number - 1)
|
||||
|
||||
# Page dimensions are measured in points. One point is 1/72 inch (around 0.3528 mm).
|
||||
width = int(pdfium.FPDF_GetPageWidthF(page) + 0.5)
|
||||
height = int(pdfium.FPDF_GetPageHeightF(page) + 0.5)
|
||||
|
||||
# Converting to page
|
||||
render_width = int(width / 72 * render_target_dpi)
|
||||
render_height = int(height / 72 * render_target_dpi)
|
||||
|
||||
# render to bitmap
|
||||
bitmap = pdfium.FPDFBitmap_Create(render_width, render_height, 0)
|
||||
pdfium.FPDFBitmap_FillRect(
|
||||
bitmap, 0, 0, render_width, render_height, 0xFFFFFFFF
|
||||
)
|
||||
pdfium.FPDF_RenderPageBitmap(
|
||||
bitmap,
|
||||
page,
|
||||
0,
|
||||
0,
|
||||
render_width,
|
||||
render_height,
|
||||
0,
|
||||
pdfium.FPDF_LCD_TEXT | pdfium.FPDF_ANNOT,
|
||||
)
|
||||
|
||||
# retrieve data from bitmap
|
||||
buffer = pdfium.FPDFBitmap_GetBuffer(bitmap)
|
||||
buffer_ = ctypes.cast(
|
||||
buffer, ctypes.POINTER(ctypes.c_ubyte * (render_width * render_height * 4))
|
||||
)
|
||||
|
||||
img = Image.frombuffer(
|
||||
"RGBA", (render_width, render_height), buffer_.contents, "raw", "BGRA", 0, 1
|
||||
)
|
||||
|
||||
if bitmap is not None:
|
||||
pdfium.FPDFBitmap_Destroy(bitmap)
|
||||
pdfium.FPDF_ClosePage(page)
|
||||
|
||||
pdfium.FPDF_CloseDocument(doc)
|
||||
|
||||
return img
|
||||
|
||||
def render_pdf_and_save(
|
||||
self,
|
||||
input_file: str,
|
||||
output_file: str,
|
||||
render_target_dpi: int,
|
||||
page_number: int = 1,
|
||||
):
|
||||
img = self.render_pdf(input_file, render_target_dpi, page_number)
|
||||
img.save(output_file)
|
||||
img.close()
|
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class TiffRenderer:
|
||||
def get_page_count(self, input_file: str):
|
||||
tiffstack = Image.open(input_file)
|
||||
tiffstack.load()
|
||||
return tiffstack.n_frames
|
||||
|
||||
def render_tiff_and_save(
|
||||
self, input_file: str, output_file: str, page_number: int = 1
|
||||
):
|
||||
tiffstack = Image.open(input_file)
|
||||
tiffstack.load()
|
||||
|
||||
tiffstack.seek(page_number - 1)
|
||||
|
||||
tiffstack.save(output_file)
|
||||
tiffstack.close()
|
|
@ -1,14 +1,18 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
from typing import List
|
||||
from typing import Collection
|
||||
|
||||
from redact.types.fott_label import FottLabel
|
||||
from redact.utils.redact_policy import first_char
|
||||
|
||||
|
||||
class FottLabelRedaction:
|
||||
def __init__(self, fott_label: FottLabel, labels_to_redact: List[str] = []):
|
||||
def __init__(
|
||||
self,
|
||||
fott_label: FottLabel,
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
self.fott_label = fott_label
|
||||
self.labels_to_redact = labels_to_redact
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from typing import List
|
||||
from typing import List, Collection
|
||||
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
|
@ -16,7 +16,12 @@ class ImageRedaction:
|
|||
COLOR = "#FFFFFF"
|
||||
COLOR_WITH_ALPHA = "#FFFFFFFF"
|
||||
|
||||
def __init__(self, image: Image, annotations: List[Annotation], labels_to_redact: List[str] = []):
|
||||
def __init__(
|
||||
self,
|
||||
image: Image,
|
||||
annotations: List[Annotation],
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
self.image = image
|
||||
self.anntations = annotations
|
||||
self.labels_to_redact = labels_to_redact
|
||||
|
@ -24,14 +29,20 @@ class ImageRedaction:
|
|||
def redact(self):
|
||||
draw = ImageDraw.Draw(self.image)
|
||||
for annotation in self.anntations:
|
||||
if len(self.labels_to_redact) == 0 or annotation.field in self.labels_to_redact:
|
||||
if (
|
||||
len(self.labels_to_redact) == 0
|
||||
or annotation.field in self.labels_to_redact
|
||||
):
|
||||
if self.with_alpha_channel(self.image.mode):
|
||||
draw.polygon(annotation.bounding_box,
|
||||
fill=self.COLOR_WITH_ALPHA,
|
||||
outline=self.COLOR_WITH_ALPHA)
|
||||
draw.polygon(
|
||||
annotation.bounding_box,
|
||||
fill=self.COLOR_WITH_ALPHA,
|
||||
outline=self.COLOR_WITH_ALPHA,
|
||||
)
|
||||
else:
|
||||
draw.polygon(annotation.bounding_box,
|
||||
fill=self.COLOR, outline=self.COLOR)
|
||||
draw.polygon(
|
||||
annotation.bounding_box, fill=self.COLOR, outline=self.COLOR
|
||||
)
|
||||
|
||||
def with_alpha_channel(self, mode):
|
||||
"""See https://github.com/python-pillow/Pillow/blob/affa059e959280bf7826ec1a023a64cb8f111b6d/Tests/test_image_access.py#L185
|
||||
|
@ -54,11 +65,7 @@ class ImageRedaction:
|
|||
"YCbCr",
|
||||
):
|
||||
return False
|
||||
elif mode in (
|
||||
"LA",
|
||||
"PA",
|
||||
"RGBA"
|
||||
):
|
||||
elif mode in ("LA", "PA", "RGBA"):
|
||||
return True
|
||||
else:
|
||||
raise Exception(f"Image mode \"{mode}\" is not supported.")
|
||||
raise Exception(f'Image mode "{mode}" is not supported.')
|
||||
|
|
|
@ -2,96 +2,44 @@
|
|||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from typing import List, Set
|
||||
|
||||
from jsonpointer import resolve_pointer, set_pointer
|
||||
from typing import List, Collection
|
||||
|
||||
from redact.redaction.ocr_result_redaction_v2 import OcrResultRedactionV2
|
||||
from redact.redaction.ocr_result_redaction_v3 import OcrResultRedactionV3
|
||||
from redact.types.annotation import Annotation
|
||||
from redact.utils.bounding_box_mapping import similar
|
||||
from redact.utils.redact_policy import first_char
|
||||
from redact.types.api_version import ApiVersion
|
||||
|
||||
|
||||
class OcrResultRedaction:
|
||||
LINE_OVERLAP_THRESHOLD = 0.1
|
||||
WORD_OVERLAP_THRESHOLD = 0.98
|
||||
|
||||
def __init__(self, ocr_result: dict, annotations: List[Annotation], labels_to_redact: List[str] = []):
|
||||
def __init__(
|
||||
self,
|
||||
ocr_result: dict,
|
||||
annotations: List[Annotation],
|
||||
api_version: ApiVersion = ApiVersion.V3_0,
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
self.ocr_result = ocr_result
|
||||
self.annotations = annotations
|
||||
self.labels_to_redact = labels_to_redact
|
||||
self.api_version = api_version
|
||||
|
||||
def redact(self):
|
||||
refs = []
|
||||
for annot in self.annotations:
|
||||
if len(self.labels_to_redact) == 0 or annot.field in self.labels_to_redact:
|
||||
refs.extend(self.find_mapped_refs(annot))
|
||||
self.redact_words(refs)
|
||||
self.redact_lines(refs)
|
||||
# Set is faster than List in this case.
|
||||
self.redact_page_results(set(refs))
|
||||
|
||||
def find_mapped_refs(self, annot: Annotation):
|
||||
refs = []
|
||||
read_results = self.ocr_result["analyzeResult"]["readResults"]
|
||||
for read_id, read_result in enumerate(read_results):
|
||||
lines: List[dict] = read_result["lines"]
|
||||
for line_id, line in enumerate(lines):
|
||||
# Early rejection.
|
||||
if not similar(annot.bounding_box, line["boundingBox"], self.LINE_OVERLAP_THRESHOLD):
|
||||
continue
|
||||
|
||||
words: List[dict] = line["words"]
|
||||
for word_id, word in enumerate(words):
|
||||
if similar(annot.bounding_box, word["boundingBox"], self.WORD_OVERLAP_THRESHOLD):
|
||||
refs.append(self.build_ref(read_id, line_id, word_id))
|
||||
return refs
|
||||
|
||||
def redact_words(self, refs: List[str]):
|
||||
def word_path(ref: str) -> str:
|
||||
# Remove leading '#'.
|
||||
return ref[1:]
|
||||
|
||||
for ref in refs:
|
||||
r = word_path(ref)
|
||||
word = resolve_pointer(self.ocr_result, r)
|
||||
word["text"] = first_char(word["text"])
|
||||
set_pointer(self.ocr_result, r, word)
|
||||
|
||||
def redact_lines(self, refs: List[str]):
|
||||
def line_path(ref: str) -> str:
|
||||
end = ref.find("/word")
|
||||
# Remove leading '#' and trailing word path.
|
||||
return ref[1:end]
|
||||
|
||||
for ref in refs:
|
||||
r = line_path(ref)
|
||||
line = resolve_pointer(self.ocr_result, r)
|
||||
|
||||
tokens = line["text"].split(' ')
|
||||
word_id = int(ref.split('/')[-1])
|
||||
tokens[word_id] = first_char(tokens[word_id])
|
||||
line["text"] = ' '.join(tokens)
|
||||
|
||||
set_pointer(self.ocr_result, r, line)
|
||||
|
||||
def redact_page_results(self, refs: Set[str]):
|
||||
def add_analyze_layer(elem: str) -> str:
|
||||
return elem.replace('#/', '#/analyzeResult/')
|
||||
|
||||
page_results = self.ocr_result["analyzeResult"]["pageResults"]
|
||||
for page_result in page_results:
|
||||
tables: List[dict] = page_result["tables"]
|
||||
for table in tables:
|
||||
cells: List[dict] = table["cells"]
|
||||
for cell in cells:
|
||||
elements: List[str] = cell["elements"]
|
||||
for elem_id, element in enumerate(elements):
|
||||
full_elem = add_analyze_layer(element)
|
||||
if full_elem in refs:
|
||||
tokens = cell["text"].split(' ')
|
||||
tokens[elem_id] = first_char(tokens[elem_id])
|
||||
cell["text"] = ' '.join(tokens)
|
||||
|
||||
@ staticmethod
|
||||
def build_ref(read_id: int, line_id: int, word_id: int) -> str:
|
||||
return f'#/analyzeResult/readResults/{read_id}/lines/{line_id}/words/{word_id}'
|
||||
if ApiVersion(self.api_version) in [
|
||||
ApiVersion.V2_0,
|
||||
ApiVersion.V2_1,
|
||||
]:
|
||||
redaction = OcrResultRedactionV2(
|
||||
self.ocr_result,
|
||||
self.annotations,
|
||||
self.labels_to_redact,
|
||||
)
|
||||
redaction.redact()
|
||||
elif ApiVersion(self.api_version) in [
|
||||
ApiVersion.V3_0,
|
||||
]:
|
||||
redaction = OcrResultRedactionV3(
|
||||
self.ocr_result,
|
||||
self.annotations,
|
||||
self.labels_to_redact,
|
||||
)
|
||||
redaction.redact()
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from typing import List, Set, Collection
|
||||
|
||||
from jsonpointer import resolve_pointer, set_pointer
|
||||
|
||||
from redact.types.annotation import Annotation
|
||||
from redact.utils.bounding_box_mapping import similar
|
||||
from redact.utils.redact_policy import first_char
|
||||
|
||||
|
||||
class OcrResultRedactionV2:
|
||||
LINE_OVERLAP_THRESHOLD = 0.1
|
||||
WORD_OVERLAP_THRESHOLD = 0.98
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ocr_result: dict,
|
||||
annotations: List[Annotation],
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
self.ocr_result = ocr_result
|
||||
self.annotations = annotations
|
||||
self.labels_to_redact = labels_to_redact
|
||||
|
||||
def redact(self):
|
||||
refs = []
|
||||
for annot in self.annotations:
|
||||
if len(self.labels_to_redact) == 0 or annot.field in self.labels_to_redact:
|
||||
refs.extend(self.find_mapped_refs(annot))
|
||||
self.redact_words(refs)
|
||||
self.redact_lines(refs)
|
||||
# Set is faster than List in this case.
|
||||
self.redact_page_results(set(refs))
|
||||
|
||||
def find_mapped_refs(self, annot: Annotation):
|
||||
refs = []
|
||||
read_results = self.ocr_result["analyzeResult"]["readResults"]
|
||||
for read_id, read_result in enumerate(read_results):
|
||||
lines: List[dict] = read_result["lines"]
|
||||
for line_id, line in enumerate(lines):
|
||||
# Early rejection.
|
||||
if not similar(
|
||||
annot.bounding_box,
|
||||
line["boundingBox"],
|
||||
self.LINE_OVERLAP_THRESHOLD,
|
||||
):
|
||||
continue
|
||||
|
||||
words: List[dict] = line["words"]
|
||||
for word_id, word in enumerate(words):
|
||||
if similar(
|
||||
annot.bounding_box,
|
||||
word["boundingBox"],
|
||||
self.WORD_OVERLAP_THRESHOLD,
|
||||
):
|
||||
refs.append(self.build_ref(read_id, line_id, word_id))
|
||||
return refs
|
||||
|
||||
def redact_words(self, refs: List[str]):
|
||||
def word_path(ref: str) -> str:
|
||||
# Remove leading '#'.
|
||||
return ref[1:]
|
||||
|
||||
for ref in refs:
|
||||
r = word_path(ref)
|
||||
word = resolve_pointer(self.ocr_result, r)
|
||||
word["text"] = first_char(word["text"])
|
||||
set_pointer(self.ocr_result, r, word)
|
||||
|
||||
def redact_lines(self, refs: List[str]):
|
||||
def line_path(ref: str) -> str:
|
||||
end = ref.find("/word")
|
||||
# Remove leading '#' and trailing word path.
|
||||
return ref[1:end]
|
||||
|
||||
for ref in refs:
|
||||
r = line_path(ref)
|
||||
line = resolve_pointer(self.ocr_result, r)
|
||||
|
||||
tokens = line["text"].split(" ")
|
||||
word_id = int(ref.split("/")[-1])
|
||||
tokens[word_id] = first_char(tokens[word_id])
|
||||
line["text"] = " ".join(tokens)
|
||||
|
||||
set_pointer(self.ocr_result, r, line)
|
||||
|
||||
def redact_page_results(self, refs: Set[str]):
|
||||
def add_analyze_layer(elem: str) -> str:
|
||||
return elem.replace("#/", "#/analyzeResult/")
|
||||
|
||||
page_results = self.ocr_result["analyzeResult"]["pageResults"]
|
||||
for page_result in page_results:
|
||||
tables: List[dict] = page_result["tables"]
|
||||
for table in tables:
|
||||
cells: List[dict] = table["cells"]
|
||||
for cell in cells:
|
||||
elements: List[str] = cell["elements"]
|
||||
for elem_id, element in enumerate(elements):
|
||||
full_elem = add_analyze_layer(element)
|
||||
if full_elem in refs:
|
||||
tokens = cell["text"].split(" ")
|
||||
tokens[elem_id] = first_char(tokens[elem_id])
|
||||
cell["text"] = " ".join(tokens)
|
||||
|
||||
@staticmethod
|
||||
def build_ref(read_id: int, line_id: int, word_id: int) -> str:
|
||||
return f"#/analyzeResult/readResults/{read_id}/lines/{line_id}/words/{word_id}"
|
|
@ -0,0 +1,117 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from typing import List, Collection
|
||||
|
||||
from dacite import from_dict
|
||||
|
||||
from redact.types.annotation import Annotation
|
||||
from redact.types.span import Span
|
||||
from redact.utils.bounding_box_mapping import similar
|
||||
from redact.utils.redact_policy import first_char
|
||||
|
||||
|
||||
class OcrResultRedactionV3:
|
||||
WORD_OVERLAP_THRESHOLD = 0.98
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ocr_result: dict,
|
||||
annotations: List[Annotation],
|
||||
labels_to_redact: Collection[str] = tuple(),
|
||||
):
|
||||
self.ocr_result = ocr_result
|
||||
self.annotations = annotations
|
||||
self.labels_to_redact = labels_to_redact
|
||||
|
||||
def redact(self):
|
||||
words_to_redact = self.find_words_to_redact()
|
||||
self.redact_words(words_to_redact)
|
||||
spans = [
|
||||
from_dict(data_class=Span, data=word["span"]) for word in words_to_redact
|
||||
]
|
||||
self.redact_lines(spans)
|
||||
self.redact_content(spans)
|
||||
self.redact_table(spans)
|
||||
|
||||
def find_words_to_redact(self):
|
||||
words_to_redact = []
|
||||
|
||||
pages = self.ocr_result["analyzeResult"]["pages"]
|
||||
for page in pages:
|
||||
for annot in self.annotations:
|
||||
if (
|
||||
len(self.labels_to_redact) == 0
|
||||
or annot.field in self.labels_to_redact
|
||||
):
|
||||
words = page["words"]
|
||||
for word in words:
|
||||
if similar(
|
||||
annot.bounding_box,
|
||||
word["boundingBox"],
|
||||
self.WORD_OVERLAP_THRESHOLD,
|
||||
):
|
||||
words_to_redact.append(word)
|
||||
break
|
||||
return words_to_redact
|
||||
|
||||
def redact_words(self, words_to_redact):
|
||||
for word in words_to_redact:
|
||||
word["content"] = first_char(word["content"])
|
||||
|
||||
def redact_lines(self, spans: List[Span]):
|
||||
pages = self.ocr_result["analyzeResult"]["pages"]
|
||||
for redact_span in spans:
|
||||
line_to_redact = self.get_line_to_redact(pages, redact_span)
|
||||
if line_to_redact is not None:
|
||||
line_spans = Span.from_dict_list(line_to_redact["spans"])
|
||||
relative_span = redact_span.relative_to(line_spans)
|
||||
line_to_redact["content"] = self.redact_text(
|
||||
line_to_redact["content"], relative_span
|
||||
)
|
||||
|
||||
def redact_content(self, spans: List[Span]):
|
||||
content = self.ocr_result["analyzeResult"]["content"]
|
||||
for span in spans:
|
||||
content = self.redact_text(content, span)
|
||||
self.ocr_result["analyzeResult"]["content"] = content
|
||||
|
||||
def redact_table(self, spans: List[Span]):
|
||||
tables = self.ocr_result["analyzeResult"]["tables"]
|
||||
for span in spans:
|
||||
cell_to_redact = self.get_cell_to_redact(tables, span)
|
||||
if cell_to_redact is not None:
|
||||
cell_spans = Span.from_dict_list(cell_to_redact["spans"])
|
||||
relative_span = span.relative_to(cell_spans)
|
||||
cell_to_redact["content"] = self.redact_text(
|
||||
cell_to_redact["content"], relative_span
|
||||
)
|
||||
|
||||
def get_line_to_redact(self, pages, redact_span: Span):
|
||||
for page in pages:
|
||||
for line in page["lines"]:
|
||||
line_spans = Span.from_dict_list(line["spans"])
|
||||
if redact_span.inside(line_spans):
|
||||
return line
|
||||
return None
|
||||
|
||||
def get_cell_to_redact(self, tables, span: Span):
|
||||
for table in tables:
|
||||
for cell in table["cells"]:
|
||||
cell_spans = Span.from_dict_list(cell["spans"])
|
||||
if span.inside(cell_spans):
|
||||
return cell
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def redact_text(content: str, span: Span) -> str:
|
||||
left = span.offset
|
||||
right = span.offset + span.length
|
||||
|
||||
pre = content[:left]
|
||||
text_to_redact = content[left:right]
|
||||
post = content[right:]
|
||||
|
||||
redacted_text = first_char(text_to_redact)
|
||||
return pre + redacted_text + post
|
|
@ -9,5 +9,6 @@ from typing import List
|
|||
@dataclass
|
||||
class Annotation:
|
||||
bounding_box: List[float]
|
||||
page: int
|
||||
field: str
|
||||
text: str
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ApiVersion(Enum):
|
||||
V2_0 = "v2.0"
|
||||
V2_1 = "v2.1"
|
||||
V3_0 = "v3.0"
|
|
@ -7,9 +7,11 @@ import re
|
|||
from typing import List, Any
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class FileType(Enum):
|
||||
IMAGE_ONLY = ".+(\\.jpeg|\\.jpg|\\.tif|\\.tiff|\\.png|\\.bmp)$"
|
||||
PDF_ONLY = ".+(\\.pdf)$"
|
||||
SINGLE_PAGE_IMAGE = ".+(\\.jpeg|\\.jpg|\\.png|\\.bmp)$"
|
||||
MULTI_PAGE = ".+(\\.pdf|\\.tif|\\.tiff)$"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileBundle:
|
||||
|
@ -31,9 +33,12 @@ class FileBundle:
|
|||
ocr_file = img_file + ocr_suffix
|
||||
|
||||
if label_file in names and ocr_file in names:
|
||||
ret.append(FileBundle(
|
||||
image_file_name=img_file,
|
||||
fott_file_name=label_file,
|
||||
ocr_file_name=ocr_file))
|
||||
ret.append(
|
||||
FileBundle(
|
||||
image_file_name=img_file,
|
||||
fott_file_name=label_file,
|
||||
ocr_file_name=ocr_file,
|
||||
)
|
||||
)
|
||||
|
||||
return ret
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Tuple
|
||||
from types import MappingProxyType
|
||||
|
||||
from redact.types.annotation import Annotation
|
||||
|
||||
|
@ -13,7 +14,7 @@ class Entity:
|
|||
page: int
|
||||
text: str
|
||||
# camelCase instead of snake_case for aligning with the JSON schema.
|
||||
boundingBoxes: List[List[float]]
|
||||
boundingBoxes: List[List[float]] # noqa: N815
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -26,7 +27,10 @@ class Label:
|
|||
class FottLabel:
|
||||
labels: List[Label]
|
||||
|
||||
def to_annotations(self, page_size: Dict[int, Tuple[float, float]] = {1: (1.0, 1.0)}) -> List[Annotation]:
|
||||
def to_annotations(
|
||||
self,
|
||||
page_size: Dict[int, Tuple[float, float]] = MappingProxyType({1: (1.0, 1.0)}),
|
||||
) -> List[Annotation]:
|
||||
def to_pixel(page: int, bounding_box: List[float]) -> List[float]:
|
||||
width = page_size[page][0]
|
||||
height = page_size[page][1]
|
||||
|
@ -44,7 +48,11 @@ class FottLabel:
|
|||
for entity in label.value:
|
||||
for bounding_box in entity.boundingBoxes:
|
||||
annot = Annotation(
|
||||
bounding_box=to_pixel(entity.page, bounding_box), field=label.label, text=entity.text)
|
||||
bounding_box=to_pixel(entity.page, bounding_box),
|
||||
field=label.label,
|
||||
text=entity.text,
|
||||
page=entity.page,
|
||||
)
|
||||
annotations.append(annot)
|
||||
|
||||
return annotations
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
import dacite
|
||||
|
||||
|
||||
@dataclass
|
||||
class Span:
|
||||
offset: int
|
||||
length: int
|
||||
|
||||
def includes(self, other: Span) -> bool:
|
||||
return (
|
||||
self.offset <= other.offset
|
||||
and self.offset + self.length >= other.offset + other.length
|
||||
)
|
||||
|
||||
def inside(self, others: List[Span]) -> bool:
|
||||
return any(span.includes(self) for span in others)
|
||||
|
||||
def relative_to(self, others: List[Span]) -> Span:
|
||||
if not self.inside(others):
|
||||
raise ValueError("Self span is not inside target span list.")
|
||||
|
||||
offset = 0
|
||||
for other in others:
|
||||
if other.includes(self):
|
||||
offset += self.offset - other.offset
|
||||
break
|
||||
else:
|
||||
offset += other.length
|
||||
return Span(offset=offset, length=self.length)
|
||||
|
||||
@staticmethod
|
||||
def from_dict(data: dict) -> Span:
|
||||
return dacite.from_dict(data_class=Span, data=data)
|
||||
|
||||
@staticmethod
|
||||
def from_dict_list(data: List[dict]) -> List[Span]:
|
||||
return [Span.from_dict(d) for d in data]
|
|
@ -9,7 +9,11 @@ from shapely.geometry import Polygon
|
|||
OVERLAP_THRESHOLD = 0.5
|
||||
|
||||
|
||||
def similar(bounding_box_a: List[float], bounding_box_b: List[float], threshold=OVERLAP_THRESHOLD) -> bool:
|
||||
def similar(
|
||||
bounding_box_a: List[float],
|
||||
bounding_box_b: List[float],
|
||||
threshold=OVERLAP_THRESHOLD,
|
||||
) -> bool:
|
||||
a = Polygon(pairwise(bounding_box_a))
|
||||
b = Polygon(pairwise(bounding_box_b))
|
||||
base_area = min(a.area, b.area)
|
||||
|
@ -20,6 +24,6 @@ def similar(bounding_box_a: List[float], bounding_box_b: List[float], threshold=
|
|||
def pairwise(elements: List[float]) -> List[Tuple[float, float]]:
|
||||
ret = []
|
||||
for i in range(0, len(elements), 2):
|
||||
pair = tuple([elements[i], elements[i+1]])
|
||||
pair = tuple([elements[i], elements[i + 1]])
|
||||
ret.append(pair)
|
||||
return ret
|
||||
|
|
|
@ -5,21 +5,40 @@
|
|||
import re
|
||||
|
||||
|
||||
def valid_url(url: str):
|
||||
def valid_url(url: str) -> bool:
|
||||
# This is copied from django url validation regex.
|
||||
# Source: https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
|
||||
regex = re.compile(
|
||||
r'^(?:http|ftp)s?://' # http:// or https://
|
||||
r"^(?:http|ftp)s?://" # http:// or https://
|
||||
# domain...
|
||||
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'
|
||||
r'localhost|' # localhost...
|
||||
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
|
||||
r'(?::\d+)?' # optional port
|
||||
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
||||
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|"
|
||||
r"localhost|" # localhost...
|
||||
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
|
||||
r"(?::\d+)?" # optional port
|
||||
r"(?:/?|[/?]\S+)$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
return re.match(regex, url)
|
||||
|
||||
|
||||
def get_redacted_file_name(name: str):
|
||||
def get_redacted_file_name(name: str) -> str:
|
||||
tokens = name.split(".")
|
||||
tokens[0] = 'redacted_' + tokens[0]
|
||||
return '.'.join(tokens)
|
||||
tokens[0] = "redacted_" + tokens[0]
|
||||
return ".".join(tokens)
|
||||
|
||||
|
||||
def get_page_file_name(name: str, page: int, suffix: str = None) -> str:
|
||||
if suffix is None:
|
||||
return name + "." + str(page).zfill(3)
|
||||
else:
|
||||
return name + "." + str(page).zfill(3) + suffix
|
||||
|
||||
|
||||
def is_pdf(name: str) -> bool:
|
||||
regex = re.compile(".+(\\.pdf)$")
|
||||
return re.match(regex, name)
|
||||
|
||||
|
||||
def is_tiff(name: str) -> bool:
|
||||
regex = re.compile(".+(\\.tiff?)$")
|
||||
return re.match(regex, name)
|
||||
|
|
|
@ -15,10 +15,9 @@ def first_char(item: str) -> str:
|
|||
|
||||
# This also takes care of other common letter in Europe languages (Ø) and
|
||||
# linguistic ligatures (Œ) instead of just A-Z.
|
||||
ret = re.sub('[A-ZØÞŁꜲÆꜴꜶꜸꜺꜼǶŒꝎẞꜨꝠ]', 'A', ret)
|
||||
ret = re.sub('[a-zøþıłꜳæꬱꜵꜷꜹꜻꜽ🙰ꭁƕỻœꝏßꜩꝡ]', 'a', ret)
|
||||
ret = re.sub('[0-9]', '0', ret)
|
||||
return ret
|
||||
ret = re.sub("[A-ZØÞŁꜲÆꜴꜶꜸꜺꜼǶŒꝎẞꜨꝠ]", "A", ret)
|
||||
ret = re.sub("[a-zøþıłꜳæꬱꜵꜷꜹꜻꜽ🙰ꭁƕỻœꝏßꜩꝡ]", "a", ret)
|
||||
return re.sub("[0-9]", "0", ret)
|
||||
|
||||
|
||||
def remove_diacritics(input_str: str) -> str:
|
||||
|
@ -35,5 +34,5 @@ def remove_diacritics(input_str: str) -> str:
|
|||
Returns:
|
||||
str: The string without diacritics and typographical ligatures.
|
||||
"""
|
||||
nfkd_form = unicodedata.normalize('NFKD', input_str)
|
||||
return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
|
||||
nfkd_form = unicodedata.normalize("NFKD", input_str)
|
||||
return "".join([c for c in nfkd_form if not unicodedata.combining(c)])
|
||||
|
|
Двоичные данные
scripts/redact_cli_py/requirements.txt
Двоичные данные
scripts/redact_cli_py/requirements.txt
Двоичный файл не отображается.
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 235 KiB |
Двоичный файл не отображается.
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Двоичный файл не отображается.
1233
scripts/redact_cli_py/testdata/testdata.jpg.2021-09-30-preview.ocr.json
поставляемый
Normal file
1233
scripts/redact_cli_py/testdata/testdata.jpg.2021-09-30-preview.ocr.json
поставляемый
Normal file
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
1232
scripts/redact_cli_py/testdata/testdata.jpg.2021-09-30-preview.redacted.ocr.json
поставляемый
Normal file
1232
scripts/redact_cli_py/testdata/testdata.jpg.2021-09-30-preview.redacted.ocr.json
поставляемый
Normal file
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -9,29 +9,63 @@ from redact.types.annotation import Annotation
|
|||
|
||||
class AnnotationFactory:
|
||||
def build_annotations() -> List[Annotation]:
|
||||
annotations = [
|
||||
Annotation(bounding_box=[375.0, 739.0, 517.0, 738.0, 517.0,
|
||||
782.0, 375.0, 781.0],
|
||||
field='Name', text='Aenean'),
|
||||
Annotation(bounding_box=[1265.0, 1091.0, 1495.0, 1090.0, 1494.0,
|
||||
1132.0, 1267.0, 1134.0],
|
||||
field='Date', text='1900/01/01'),
|
||||
Annotation(bounding_box=[1260.0, 1165.0, 1445.9999999999998,
|
||||
1165.0, 1445.0, 1210.0, 1261.0, 1212.0],
|
||||
field='Total', text='$3000.00')]
|
||||
|
||||
return annotations
|
||||
return [
|
||||
Annotation(
|
||||
bounding_box=[375.0, 739.0, 517.0, 738.0, 517.0, 782.0, 375.0, 781.0],
|
||||
field="Name",
|
||||
text="Aenean",
|
||||
page=1,
|
||||
),
|
||||
Annotation(
|
||||
bounding_box=[
|
||||
1265.0,
|
||||
1091.0,
|
||||
1495.0,
|
||||
1090.0,
|
||||
1494.0,
|
||||
1132.0,
|
||||
1267.0,
|
||||
1134.0,
|
||||
],
|
||||
field="Date",
|
||||
text="1900/01/01",
|
||||
page=1,
|
||||
),
|
||||
Annotation(
|
||||
bounding_box=[
|
||||
1260.0,
|
||||
1165.0,
|
||||
1445.9999999999998,
|
||||
1165.0,
|
||||
1445.0,
|
||||
1210.0,
|
||||
1261.0,
|
||||
1212.0,
|
||||
],
|
||||
field="Total",
|
||||
text="$3000.00",
|
||||
page=1,
|
||||
),
|
||||
]
|
||||
|
||||
def build_annotations_mode_1() -> List[Annotation]:
|
||||
annotations = [
|
||||
Annotation(bounding_box=[76, 105, 104, 105,
|
||||
104, 111, 76, 111],
|
||||
field='Name', text=''),
|
||||
Annotation(bounding_box=[255, 155, 301, 155,
|
||||
301, 161, 255, 162],
|
||||
field='Date', text=''),
|
||||
Annotation(bounding_box=[254, 166, 291, 166,
|
||||
291, 172, 254, 173],
|
||||
field='Total', text='')]
|
||||
|
||||
return annotations
|
||||
return [
|
||||
Annotation(
|
||||
bounding_box=[76, 105, 104, 105, 104, 111, 76, 111],
|
||||
field="Name",
|
||||
text="",
|
||||
page=1,
|
||||
),
|
||||
Annotation(
|
||||
bounding_box=[255, 155, 301, 155, 301, 161, 255, 162],
|
||||
field="Date",
|
||||
text="",
|
||||
page=1,
|
||||
),
|
||||
Annotation(
|
||||
bounding_box=[254, 166, 291, 166, 291, 172, 254, 173],
|
||||
field="Total",
|
||||
text="",
|
||||
page=1,
|
||||
),
|
||||
]
|
||||
|
|
|
@ -10,39 +10,34 @@ from redact.types.fott_label import FottLabel
|
|||
|
||||
|
||||
class FottLabelFactory:
|
||||
|
||||
@staticmethod
|
||||
def build() -> FottLabel:
|
||||
fott_label_path = "testdata/testdata.jpg.labels.json"
|
||||
|
||||
with open(fott_label_path, encoding='utf-8-sig') as fott_label_json:
|
||||
with open(fott_label_path, encoding="utf-8-sig") as fott_label_json:
|
||||
fott_label_dict = json.load(fott_label_json)
|
||||
fott_label = from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
return fott_label
|
||||
return from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
|
||||
@staticmethod
|
||||
def build_redacted() -> FottLabel:
|
||||
fott_label_path = "testdata/testdata.redacted.labels.json"
|
||||
|
||||
with open(fott_label_path, encoding='utf-8-sig') as fott_label_json:
|
||||
with open(fott_label_path, encoding="utf-8-sig") as fott_label_json:
|
||||
fott_label_dict = json.load(fott_label_json)
|
||||
fott_label = from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
return fott_label
|
||||
return from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
|
||||
@staticmethod
|
||||
def build_partial() -> FottLabel:
|
||||
fott_label_path = "testdata/testdata-partial.jpg.labels.json"
|
||||
|
||||
with open(fott_label_path, encoding='utf-8-sig') as fott_label_json:
|
||||
with open(fott_label_path, encoding="utf-8-sig") as fott_label_json:
|
||||
fott_label_dict = json.load(fott_label_json)
|
||||
fott_label = from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
return fott_label
|
||||
return from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
|
||||
@staticmethod
|
||||
def build_redacted_partial() -> FottLabel:
|
||||
fott_label_path = "testdata/testdata-partial.redacted.labels.json"
|
||||
|
||||
with open(fott_label_path, encoding='utf-8-sig') as fott_label_json:
|
||||
with open(fott_label_path, encoding="utf-8-sig") as fott_label_json:
|
||||
fott_label_dict = json.load(fott_label_json)
|
||||
fott_label = from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
return fott_label
|
||||
return from_dict(data_class=FottLabel, data=fott_label_dict)
|
||||
|
|
|
@ -6,7 +6,6 @@ from PIL import Image
|
|||
|
||||
|
||||
class ImageFactory:
|
||||
|
||||
@staticmethod
|
||||
def build() -> Image:
|
||||
image_path = "testdata/testdata.jpg"
|
||||
|
|
|
@ -6,35 +6,44 @@ import json
|
|||
|
||||
|
||||
class OcrResultFactory:
|
||||
|
||||
@staticmethod
|
||||
def build() -> dict:
|
||||
ocr_result_path = "testdata/testdata.jpg.ocr.json"
|
||||
|
||||
with open(ocr_result_path, encoding='utf-8-sig') as ocr_result_json:
|
||||
ocr_result_dict = json.load(ocr_result_json)
|
||||
return ocr_result_dict
|
||||
with open(ocr_result_path, encoding="utf-8-sig") as ocr_result_json:
|
||||
return json.load(ocr_result_json)
|
||||
|
||||
@staticmethod
|
||||
def build_redacted() -> dict:
|
||||
ocr_result_path = "testdata/testdata.redacted.ocr.json"
|
||||
|
||||
with open(ocr_result_path, encoding='utf-8-sig') as ocr_result_json:
|
||||
ocr_result_dict = json.load(ocr_result_json)
|
||||
return ocr_result_dict
|
||||
with open(ocr_result_path, encoding="utf-8-sig") as ocr_result_json:
|
||||
return json.load(ocr_result_json)
|
||||
|
||||
@staticmethod
|
||||
def build_partial() -> dict:
|
||||
ocr_result_path = "testdata/testdata-partial.jpg.ocr.json"
|
||||
|
||||
with open(ocr_result_path, encoding='utf-8-sig') as ocr_result_json:
|
||||
ocr_result_dict = json.load(ocr_result_json)
|
||||
return ocr_result_dict
|
||||
with open(ocr_result_path, encoding="utf-8-sig") as ocr_result_json:
|
||||
return json.load(ocr_result_json)
|
||||
|
||||
@staticmethod
|
||||
def build_redacted_partial() -> dict:
|
||||
ocr_result_path = "testdata/testdata-partial.redacted.ocr.json"
|
||||
|
||||
with open(ocr_result_path, encoding='utf-8-sig') as ocr_result_json:
|
||||
ocr_result_dict = json.load(ocr_result_json)
|
||||
return ocr_result_dict
|
||||
with open(ocr_result_path, encoding="utf-8-sig") as ocr_result_json:
|
||||
return json.load(ocr_result_json)
|
||||
|
||||
@staticmethod
|
||||
def build_2021_09_30_preview() -> dict:
|
||||
ocr_result_path = "testdata/testdata.jpg.2021-09-30-preview.ocr.json"
|
||||
|
||||
with open(ocr_result_path, encoding="utf-8-sig") as ocr_result_json:
|
||||
return json.load(ocr_result_json)
|
||||
|
||||
@staticmethod
|
||||
def build_redacted_2021_09_30_preview() -> dict:
|
||||
ocr_result_path = "testdata/testdata.jpg.2021-09-30-preview.redacted.ocr.json"
|
||||
|
||||
with open(ocr_result_path, encoding="utf-8-sig") as ocr_result_json:
|
||||
return json.load(ocr_result_json)
|
||||
|
|
|
@ -42,7 +42,7 @@ class TestImageRedaction:
|
|||
expected_image = ImageFactory.build_redacted_partial()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
|
||||
image_redaction = ImageRedaction(image, annotations, ["Name","Date"])
|
||||
image_redaction = ImageRedaction(image, annotations, ["Name", "Date"])
|
||||
image_redaction.redact()
|
||||
|
||||
diff = ImageChops.difference(image_redaction.image, expected_image)
|
||||
|
|
|
@ -2,37 +2,62 @@
|
|||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
import pytest
|
||||
|
||||
from redact.redaction.ocr_result_redaction import OcrResultRedaction
|
||||
from redact.redaction.ocr_result_redaction_v2 import OcrResultRedactionV2
|
||||
from redact.redaction.ocr_result_redaction_v3 import OcrResultRedactionV3
|
||||
from redact.types.api_version import ApiVersion
|
||||
from tests.factories.ocr_result_factory import OcrResultFactory
|
||||
from tests.factories.annotation_factory import AnnotationFactory
|
||||
|
||||
|
||||
class TestOcrResultRedaction:
|
||||
def test_ctor(self) -> None:
|
||||
@pytest.mark.parametrize(
|
||||
"api_version",
|
||||
[
|
||||
ApiVersion.V2_0,
|
||||
ApiVersion.V2_1,
|
||||
],
|
||||
)
|
||||
def test_redact_v2(self, api_version) -> None:
|
||||
ocr_result = OcrResultFactory.build()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
ocr_result_redaction = OcrResultRedaction(ocr_result, annotations)
|
||||
|
||||
assert ocr_result_redaction.ocr_result == ocr_result
|
||||
|
||||
def test_redact(self) -> None:
|
||||
ocr_result = OcrResultFactory.build()
|
||||
expected = OcrResultFactory.build_redacted()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
|
||||
ocr_result_redaction = OcrResultRedaction(ocr_result, annotations)
|
||||
ocr_result_redaction = OcrResultRedaction(
|
||||
ocr_result,
|
||||
annotations,
|
||||
api_version,
|
||||
)
|
||||
ocr_result_redaction.redact()
|
||||
|
||||
actual = ocr_result_redaction.ocr_result
|
||||
assert actual == expected
|
||||
v2 = OcrResultRedactionV2(
|
||||
ocr_result,
|
||||
annotations,
|
||||
)
|
||||
v2.redact()
|
||||
|
||||
def test_redact_partial(self) -> None:
|
||||
ocr_result = OcrResultFactory.build_partial()
|
||||
expected = OcrResultFactory.build_redacted_partial()
|
||||
assert ocr_result_redaction.ocr_result == v2.ocr_result
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"api_version",
|
||||
[
|
||||
ApiVersion.V3_0,
|
||||
],
|
||||
)
|
||||
def test_redact_v3(self, api_version) -> None:
|
||||
ocr_result = OcrResultFactory.build_2021_09_30_preview()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
|
||||
ocr_result_redaction = OcrResultRedaction(ocr_result, annotations, ["Name", "Date"])
|
||||
ocr_result_redaction = OcrResultRedaction(
|
||||
ocr_result,
|
||||
annotations,
|
||||
api_version,
|
||||
)
|
||||
ocr_result_redaction.redact()
|
||||
|
||||
actual = ocr_result_redaction.ocr_result
|
||||
assert actual == expected
|
||||
v3 = OcrResultRedactionV3(
|
||||
ocr_result,
|
||||
annotations,
|
||||
)
|
||||
v3.redact()
|
||||
|
||||
assert ocr_result_redaction.ocr_result == v3.ocr_result
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from redact.redaction.ocr_result_redaction_v2 import OcrResultRedactionV2
|
||||
from tests.factories.ocr_result_factory import OcrResultFactory
|
||||
from tests.factories.annotation_factory import AnnotationFactory
|
||||
|
||||
|
||||
class TestOcrResultRedactionV2:
|
||||
def test_ctor(self) -> None:
|
||||
ocr_result = OcrResultFactory.build()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
ocr_result_redaction = OcrResultRedactionV2(ocr_result, annotations)
|
||||
|
||||
assert ocr_result_redaction.ocr_result == ocr_result
|
||||
|
||||
def test_redact(self) -> None:
|
||||
ocr_result = OcrResultFactory.build()
|
||||
expected = OcrResultFactory.build_redacted()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
|
||||
ocr_result_redaction = OcrResultRedactionV2(
|
||||
ocr_result,
|
||||
annotations,
|
||||
)
|
||||
ocr_result_redaction.redact()
|
||||
|
||||
actual = ocr_result_redaction.ocr_result
|
||||
assert actual == expected
|
||||
|
||||
def test_redact_partial(self) -> None:
|
||||
ocr_result = OcrResultFactory.build_partial()
|
||||
expected = OcrResultFactory.build_redacted_partial()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
|
||||
ocr_result_redaction = OcrResultRedactionV2(
|
||||
ocr_result,
|
||||
annotations,
|
||||
["Name", "Date"],
|
||||
)
|
||||
ocr_result_redaction.redact()
|
||||
|
||||
actual = ocr_result_redaction.ocr_result
|
||||
assert actual == expected
|
|
@ -0,0 +1,30 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from redact.redaction.ocr_result_redaction_v3 import OcrResultRedactionV3
|
||||
from tests.factories.ocr_result_factory import OcrResultFactory
|
||||
from tests.factories.annotation_factory import AnnotationFactory
|
||||
|
||||
|
||||
class TestOcrResultRedactionV3:
|
||||
def test_ctor(self) -> None:
|
||||
ocr_result = OcrResultFactory.build_2021_09_30_preview()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
ocr_result_redaction = OcrResultRedactionV3(ocr_result, annotations)
|
||||
|
||||
assert ocr_result_redaction.ocr_result == ocr_result
|
||||
|
||||
def test_redact(self) -> None:
|
||||
ocr_result = OcrResultFactory.build_2021_09_30_preview()
|
||||
expected = OcrResultFactory.build_redacted_2021_09_30_preview()
|
||||
annotations = AnnotationFactory.build_annotations()
|
||||
|
||||
ocr_result_redaction = OcrResultRedactionV3(
|
||||
ocr_result,
|
||||
annotations,
|
||||
)
|
||||
ocr_result_redaction.redact()
|
||||
|
||||
actual = ocr_result_redaction.ocr_result
|
||||
assert actual == expected
|
|
@ -2,13 +2,13 @@
|
|||
# Licensed under the MIT License. See License.txt in the project
|
||||
# root for license information.
|
||||
|
||||
from PIL import ImageChops, ImageStat, Image
|
||||
from PIL import ImageChops, ImageStat
|
||||
|
||||
from redact.utils.pdf_renderer import PdfRenderer
|
||||
from redact.preprocess.pdf_renderer import PdfRenderer
|
||||
from tests.factories.image_factory import ImageFactory
|
||||
|
||||
class TestPdfRendering:
|
||||
|
||||
class TestPdfRendering:
|
||||
def test_rendering(self) -> None:
|
||||
# A small tolerance epsilon because of the jpg compression loss.
|
||||
epsilon = 0.1
|
||||
|
|
|
@ -8,17 +8,16 @@ from redact.types.file_bundle import FileType
|
|||
|
||||
class TestFileBundle:
|
||||
def test_from_names(self) -> None:
|
||||
names = [
|
||||
"a.jpg",
|
||||
"a.jpg.labels.json",
|
||||
"dummy_file.jpg",
|
||||
"a.jpg.ocr.json"]
|
||||
expected = [FileBundle(
|
||||
image_file_name="a.jpg",
|
||||
fott_file_name="a.jpg.labels.json",
|
||||
ocr_file_name="a.jpg.ocr.json")]
|
||||
names = ["a.jpg", "a.jpg.labels.json", "dummy_file.jpg", "a.jpg.ocr.json"]
|
||||
expected = [
|
||||
FileBundle(
|
||||
image_file_name="a.jpg",
|
||||
fott_file_name="a.jpg.labels.json",
|
||||
ocr_file_name="a.jpg.ocr.json",
|
||||
)
|
||||
]
|
||||
|
||||
actual = FileBundle.from_names(names, FileType.IMAGE_ONLY)
|
||||
actual = FileBundle.from_names(names, FileType.SINGLE_PAGE_IMAGE)
|
||||
|
||||
assert actual == expected
|
||||
|
||||
|
@ -30,12 +29,16 @@ class TestFileBundle:
|
|||
"a.jpg",
|
||||
"a.jpg.labels.json",
|
||||
"dummy_file.pdf",
|
||||
"a.pdf.ocr.json"]
|
||||
expected = [FileBundle(
|
||||
image_file_name="a.pdf",
|
||||
fott_file_name="a.pdf.labels.json",
|
||||
ocr_file_name="a.pdf.ocr.json")]
|
||||
"a.pdf.ocr.json",
|
||||
]
|
||||
expected = [
|
||||
FileBundle(
|
||||
image_file_name="a.pdf",
|
||||
fott_file_name="a.pdf.labels.json",
|
||||
ocr_file_name="a.pdf.ocr.json",
|
||||
)
|
||||
]
|
||||
|
||||
actual = FileBundle.from_names(names, FileType.PDF_ONLY)
|
||||
actual = FileBundle.from_names(names, FileType.MULTI_PAGE)
|
||||
|
||||
assert actual == expected
|
||||
|
|
|
@ -24,8 +24,10 @@ class TestFottLabel:
|
|||
bbox = annotations[0].bounding_box
|
||||
for i, element in enumerate(bbox):
|
||||
bbox[i] = element * 10
|
||||
annotations[0].page = 2
|
||||
|
||||
actual = fott_label.to_annotations(
|
||||
page_size={1: (2481, 3509), 2: (24810, 35090)})
|
||||
page_size={1: (2481, 3509), 2: (24810, 35090)}
|
||||
)
|
||||
|
||||
assert actual == annotations
|
||||
|
|
|
@ -11,7 +11,7 @@ class TestRedactPolicy:
|
|||
actual = first_char(text)
|
||||
assert "" == actual
|
||||
|
||||
def test_first_char_Apple(self) -> None:
|
||||
def test_first_char_apple(self) -> None:
|
||||
text = "Apple"
|
||||
actual = first_char(text)
|
||||
assert "Aaaaa" == actual
|
||||
|
|
Загрузка…
Ссылка в новой задаче