Merge pull request #655 from microsoft/staging
Syncing staging <> master branches
|
@ -10,7 +10,8 @@ Each project should live in its own subdirectory ```/contrib/<project>``` and co
|
|||
|---|---|---|
|
||||
| [Crowd counting](crowd_counting) | Counting the number of people in low-crowd-density (e.g. less than 10 people) and high-crowd-density (e.g. thousands of people) scenarios. | [![Build Status](https://dev.azure.com/team-sharat/crowd-counting/_apis/build/status/lixzhang.cnt?branchName=lixzhang%2Fsubmodule-rev3)](https://dev.azure.com/team-sharat/crowd-counting/_build/latest?definitionId=49&branchName=lixzhang%2Fsubmodule-rev3)|
|
||||
| [Action Recognition with I3D](action_recognition) | Action recognition to identify video/webcam footage from what actions are performed (e.g. "running", "opening a bottle") and at what respective start/end times. Please note, that we also have a R(2+1)D implementation of action recognition that you can find under [scenarios](../sceanrios).| |
|
||||
| [Document Image Binarization](binarization) | Binarization is a technique to segment foreground from the background pixels. A simple technique for binarization is thresholding of gray-level or color document scanned images.| |
|
||||
| [Document Image Cleanup](document_cleanup) | Given an input noisy document image, the aim of document image cleanup is to improve its readability and visibility by removing the noisy elements.| |
|
||||
|
||||
|
||||
## Tools
|
||||
| Directory | Project description | Build status (optional) |
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
# Binarization
|
||||
Binarization is a technique to segment foreground from the background pixels. A simple technique for binarization is thresholding of gray-level or color document scanned images.
|
||||
## At a glance
|
||||
|
||||
This binarization technique is an improvement over Sauvola's binarization technique. In this work, we improve the existing Sauvola's binarization technique by preserving more foreground information in the binarized document-images. In order to achieve this, we introduce a confidence score for the background pixels.
|
||||
|
||||
### Input images
|
||||
|
||||
<img src="./confidence_based_Sauvola_binarization/test_images/2.jpeg" width="33%"> </img>
|
||||
<img src="./confidence_based_Sauvola_binarization/test_images/10.jpeg" width="33%"> </img>
|
||||
<img src="./confidence_based_Sauvola_binarization/test_images/new1.jpg" width="33%"> </img>
|
||||
|
||||
### Binary outputs
|
||||
|
||||
<img src="./confidence_based_Sauvola_binarization/results/2_bin_new.png" width="33%"> </img>
|
||||
<img src="./confidence_based_Sauvola_binarization/results/10_bin_new.png" width="33%"> </img>
|
||||
<img src="./confidence_based_Sauvola_binarization/results/new1_bin_new.png" width="33%"> </img>
|
|
@ -0,0 +1,16 @@
|
|||
# Document Image Cleanup
|
||||
Given an input noisy document image, the aim of document image cleanup is to improve its readability and visibility by removing the noisy elements.
|
||||
|
||||
## Example of document image cleanup
|
||||
|
||||
### Noisy input images
|
||||
|
||||
<img src="./light_weight_document_cleanup_ICDAR2021/sample_input_output/book_org.jpg" width="33%"> </img>
|
||||
<img src="./light_weight_document_cleanup_ICDAR2021/sample_input_output/writing_org.jpg" width="33%"> </img>
|
||||
<img src="./confidence_based_Sauvola_binarization/test_images/2.jpeg" width="33%"> </img>
|
||||
|
||||
### cleanup images
|
||||
|
||||
<img src="./light_weight_document_cleanup_ICDAR2021/sample_input_output/book_dnn.jpg" width="33%"> </img>
|
||||
<img src="./light_weight_document_cleanup_ICDAR2021/sample_input_output/writing_dnn.jpg" width="33%"> </img>
|
||||
<img src="./confidence_based_Sauvola_binarization/results/2_bin_new.png" width="33%"> </img>
|
До Ширина: | Высота: | Размер: 218 KiB После Ширина: | Высота: | Размер: 218 KiB |
До Ширина: | Высота: | Размер: 158 KiB После Ширина: | Высота: | Размер: 158 KiB |
До Ширина: | Высота: | Размер: 82 KiB После Ширина: | Высота: | Размер: 82 KiB |
До Ширина: | Высота: | Размер: 68 KiB После Ширина: | Высота: | Размер: 68 KiB |
До Ширина: | Высота: | Размер: 284 KiB После Ширина: | Высота: | Размер: 284 KiB |
До Ширина: | Высота: | Размер: 122 KiB После Ширина: | Высота: | Размер: 122 KiB |
До Ширина: | Высота: | Размер: 566 KiB После Ширина: | Высота: | Размер: 566 KiB |
До Ширина: | Высота: | Размер: 294 KiB После Ширина: | Высота: | Размер: 294 KiB |
До Ширина: | Высота: | Размер: 219 KiB После Ширина: | Высота: | Размер: 219 KiB |
До Ширина: | Высота: | Размер: 135 KiB После Ширина: | Высота: | Размер: 135 KiB |
До Ширина: | Высота: | Размер: 361 KiB После Ширина: | Высота: | Размер: 361 KiB |
До Ширина: | Высота: | Размер: 547 KiB После Ширина: | Высота: | Размер: 547 KiB |
|
@ -0,0 +1,139 @@
|
|||
import cv2
|
||||
import os
|
||||
import random
|
||||
import numpy as np
|
||||
from random import randint
|
||||
import albumentations as A
|
||||
import numpy as np
|
||||
import cv2
|
||||
import sys
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
from utils import GetOverlappingBlocks, getListOfFiles, ImageResize
|
||||
|
||||
|
||||
|
||||
transform = A.Compose([
|
||||
A.OneOf([
|
||||
A.ISONoise(p=0.4),
|
||||
A.JpegCompression(quality_lower=50, quality_upper=70, always_apply=False, p=0.8),
|
||||
], p=0.6),
|
||||
A.OneOf([
|
||||
A.MotionBlur(blur_limit=10,p=.8),
|
||||
A.MedianBlur(blur_limit=3, p=0.75),
|
||||
A.GaussianBlur(blur_limit=7, p=0.75),
|
||||
], p=0.8),
|
||||
A.OneOf([
|
||||
A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3,p=0.75),
|
||||
A.RandomShadow(num_shadows_lower=1, num_shadows_upper=18, shadow_dimension=6, p=0.85),
|
||||
], p=0.8),
|
||||
])
|
||||
|
||||
|
||||
|
||||
def GenerateTrainingBlocks(data_folder,gt_folder,dataset_path='./dataset',M=256,N=256):
|
||||
print(data_folder)
|
||||
print('Generating training blocks!!!')
|
||||
train_path = dataset_path + '/' + data_folder + '_Trainblocks'
|
||||
|
||||
if not os.path.exists(train_path):
|
||||
os.makedirs(train_path)
|
||||
|
||||
|
||||
train_filenames = train_path + '/train_block_names.txt'
|
||||
f = open(train_filenames, 'w')
|
||||
|
||||
data_path = dataset_path + '/' + data_folder
|
||||
gt_path = dataset_path + '/' + gt_folder
|
||||
|
||||
print(data_path)
|
||||
|
||||
filenames = getListOfFiles(data_path)
|
||||
cnt = 0
|
||||
print(filenames)
|
||||
for name in tqdm(filenames):
|
||||
print(name)
|
||||
gt_filename = gt_path + '/' + name
|
||||
in_filename = data_path + '/' + name
|
||||
print(gt_filename)
|
||||
print(in_filename)
|
||||
gt_image_initial = cv2.imread(gt_filename)
|
||||
in_image_initial = cv2.imread(in_filename)
|
||||
print(gt_image_initial.shape,in_image_initial.shape)
|
||||
for scale in [0.7,1.0,1.4]:
|
||||
gt_image = ImageResize(gt_image_initial, scale)
|
||||
in_image = ImageResize(in_image_initial, scale)
|
||||
h,w,c = in_image.shape
|
||||
gt_img = GetOverlappingBlocks(gt_image,Part=8)
|
||||
in_img = GetOverlappingBlocks(in_image,Part=8)
|
||||
for i in range(len(gt_img)):
|
||||
train_img_path = train_path + '/block_' + str(cnt) + '.png'
|
||||
gt_img_path = train_path + '/gtblock_' + str(cnt) + '.png'
|
||||
cv2.imwrite(train_img_path,in_img[i])
|
||||
#cv2.imwrite(train_img_path,PreProcessInput(in_img[i]))
|
||||
cv2.imwrite(gt_img_path,gt_img[i])
|
||||
t_name = 'block_' + str(cnt) + '.png'
|
||||
f.write(t_name)
|
||||
f.write('\n')
|
||||
cnt += 1
|
||||
Random_Block_Number_PerImage = int(len(gt_img)/5)
|
||||
for i in range(Random_Block_Number_PerImage):
|
||||
|
||||
if(in_image.shape[0]-M>1 and in_image.shape[1]-N>1):
|
||||
y = random.randint(1, in_image.shape[0]-M)
|
||||
x = random.randint(1, in_image.shape[1]-N)
|
||||
in_part_img = in_image[y:y+M,x:x+N,:].copy()
|
||||
gt_part_img = gt_image[y:y+M,x:x+N,:].copy()
|
||||
train_img_path = train_path + '/block_' + str(cnt) + '.png'
|
||||
gt_img_path = train_path + '/gtblock_' + str(cnt) + '.png'
|
||||
in_part_img = cv2.cvtColor(in_part_img, cv2.COLOR_BGR2RGB)
|
||||
augmented_image = transform(image=in_part_img)['image']
|
||||
augmented_image = cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
cv2.imwrite(train_img_path,augmented_image)
|
||||
cv2.imwrite(gt_img_path,gt_part_img)
|
||||
t_name = 'block_' + str(cnt) + '.png'
|
||||
f.write(t_name)
|
||||
f.write('\n')
|
||||
cnt += 1
|
||||
else:
|
||||
break
|
||||
in_part_img = np.zeros((M,N,3),dtype=np.uint8)
|
||||
gt_part_img = np.zeros((M,N,3),dtype=np.uint8)
|
||||
in_part_img[:,:,:] = 255
|
||||
gt_part_img[:,:,:] = 255
|
||||
|
||||
if(in_image.shape[0]-M<=1 and in_image.shape[1]-N>1):
|
||||
y = 0
|
||||
x = random.randint(1, in_image.shape[1]-N)
|
||||
in_part_img[:h,:,:] = in_image[:,x:x+N,:].copy()
|
||||
gt_part_img[:h,:,:] = gt_image[:,x:x+N,:].copy()
|
||||
if(in_image.shape[0]-M>1 and in_image.shape[1]-N<=1):
|
||||
x = 0
|
||||
y = random.randint(1, in_image.shape[0]-M)
|
||||
in_part_img[:,:w,:] = in_image[y:y+M,:,:].copy()
|
||||
gt_part_img[:,:w,:] = gt_image[y:y+M,:,:].copy()
|
||||
|
||||
|
||||
train_img_path = train_path + '/block_' + str(cnt) + '.png'
|
||||
gt_img_path = train_path + '/gtblock_' + str(cnt) + '.png'
|
||||
in_part_img = cv2.cvtColor(in_part_img, cv2.COLOR_BGR2RGB)
|
||||
augmented_image = transform(image=in_part_img)['image']
|
||||
augmented_image = cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
cv2.imwrite(train_img_path,augmented_image)
|
||||
cv2.imwrite(gt_img_path,gt_part_img)
|
||||
t_name = 'block_' + str(cnt) + '.png'
|
||||
f.write(t_name)
|
||||
f.write('\n')
|
||||
cnt += 1
|
||||
#print(cnt)
|
||||
|
||||
|
||||
f.close()
|
||||
|
||||
print('Total number of training blocks generated: ', cnt)
|
||||
|
||||
return train_path, train_filenames
|
||||
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
# Document Image Cleanup
|
||||
Given an input noisy document image, the aim of document image cleanup is to improve its readability and visibility by removing the noisy elements.
|
||||
|
||||
## Light-weight Document Image Cleanup using Perceptual Loss
|
||||
|
||||
Smartphones have enabled effortless capturing and sharing of documents in digital form. The documents, however, often undergo various types of degradation due to aging, stains, or shortcoming of capturing environment such as shadow, non-uniform lighting, etc., which reduces the comprehensibility of the document images. In this work, we consider the problem of document image cleanup on embedded applications such as smartphone apps, which usually have memory, energy, and latency limitations due to the device and/or for best human user experience. We propose a light-weight encoder decoder based convolutional neural network architecture for removing the noisy elements from document images. To compensate for generalization performance with a low network capacity, we incorporate the perceptual loss for knowledge transfer from pre-trained deep CNN network in our loss function. In terms of the number of parameters and product-sum operations, our models are 65-1030 and 3-27 times, respectively, smaller than existing state-of-the-art document enhancement models. Overall, the proposed models offer a favorable resource versus accuracy trade-off and we empirically illustrate the efficacy of our approach on several real-world benchmark datasets.
|
||||
|
||||
### cite
|
||||
|
||||
https://link.springer.com/chapter/10.1007/978-3-030-86334-0_16
|
||||
|
||||
@InProceedings{10.1007/978-3-030-86334-0_16, author="Dey, Soumyadeep and Jawanpuria, Pratik", editor="Llad{'o}s, Josep and Lopresti, Daniel and Uchida, Seiichi", title="Light-Weight Document Image Cleanup Using Perceptual Loss", booktitle="Document Analysis and Recognition -- ICDAR 2021", year="2021", publisher="Springer International Publishing", address="Cham", pages="238--253", isbn="978-3-030-86334-0" }
|
||||
|
||||
|
||||
### Noisy input images
|
||||
|
||||
<img src="./sample_input_output/book_org.jpg" width="33%"> </img>
|
||||
<img src="./sample_input_output/pres1_org.jpg" width="33%"> </img>
|
||||
<img src="./sample_input_output/bill_org.jpg" width="33%"> </img>
|
||||
|
||||
### cleanup images
|
||||
|
||||
<img src="./sample_input_output/book_dnn.jpg" width="33%"> </img>
|
||||
<img src="./sample_input_output/pres1_dnn.jpg" width="33%"> </img>
|
||||
<img src="./sample_input_output/bill_dnn.jpg" width="33%"> </img>
|
||||
|
||||
## Setup
|
||||
|
||||
### Dependencies
|
||||
- python 3.7
|
||||
- numpy 1.16
|
||||
- opencv 4.2
|
||||
- skimage 0.17
|
||||
- tensorflow 2.4
|
||||
- albumentations
|
||||
- tqdm
|
||||
- scikit-learn
|
||||
|
||||
|
||||
### Example
|
||||
|
||||
Sample example of the usage (training and testing) of the proposed cleanup technique [notebook](./DocumentCleanup_ICDAR2021.ipynb).
|
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/dataset/sample_data/image_42.png
Normal file
После Ширина: | Высота: | Размер: 4.2 MiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/dataset/sample_data/image_56.png
Normal file
После Ширина: | Высота: | Размер: 3.5 MiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/dataset/sample_gt_data/image_42.png
Normal file
После Ширина: | Высота: | Размер: 135 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/dataset/sample_gt_data/image_56.png
Normal file
После Ширина: | Высота: | Размер: 50 KiB |
|
@ -0,0 +1,115 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.keras import datasets, layers, models
|
||||
from tensorflow.keras.models import Model, load_model
|
||||
|
||||
from tensorflow.keras.models import model_from_json
|
||||
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
|
||||
|
||||
import os
|
||||
from model import convert2gray
|
||||
from utils import GetOverlappingBlocks, CombineToImage,load_tf_img,getListOfFiles
|
||||
from tqdm import tqdm
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
#os.environ["CUDA_VISIBLE_DEVICES"]= '0'
|
||||
|
||||
#gpu_devices = tf.config.experimental.list_physical_devices('GPU')
|
||||
#tf.config.experimental.set_memory_growth(gpu_devices[0], True)
|
||||
|
||||
|
||||
|
||||
|
||||
def prepare_data_blocks(blocks,size):
|
||||
data = []
|
||||
for block in blocks:
|
||||
data.append(load_tf_img(block,size))
|
||||
#blocks = []
|
||||
return data
|
||||
|
||||
|
||||
def infer(model_name,model_weight,target_dir,save_out_dir,block_size=(256,256),batch_size=1):
|
||||
json_file = open(model_name, 'r')
|
||||
loaded_model_json = json_file.read()
|
||||
json_file.close()
|
||||
model = model_from_json(loaded_model_json,custom_objects={'relu6': tf.nn.relu6, 'convert2gray': convert2gray})
|
||||
|
||||
|
||||
model.summary()
|
||||
#exit(0)
|
||||
|
||||
model.compile(optimizer='adam', loss = 'mean_squared_error')
|
||||
|
||||
model.load_weights(model_weight)
|
||||
|
||||
if not os.path.exists(save_out_dir):
|
||||
os.makedirs(save_out_dir)
|
||||
|
||||
M = block_size[0]
|
||||
N = block_size[1]
|
||||
part = 8
|
||||
filelists = getListOfFiles(target_dir)
|
||||
for filename in tqdm(filelists):
|
||||
initial_filename = os.path.splitext(filename)[0]
|
||||
in1_filename = os.path.join(target_dir,filename)
|
||||
in_clr = cv2.imread(in1_filename,1)
|
||||
in1_image = cv2.cvtColor(in_clr, cv2.COLOR_BGR2RGB)
|
||||
in1_img = GetOverlappingBlocks(in1_image.copy(),M,N,part)
|
||||
prepared_data_blocks = prepare_data_blocks(in1_img,M)
|
||||
in1_img = []
|
||||
out_img1 = model.predict(tf.convert_to_tensor(prepared_data_blocks), batch_size=batch_size)
|
||||
num_img,ht,wd,ch_out = out_img1.shape
|
||||
h,w,ch = in_clr.shape
|
||||
if(ch_out>1):
|
||||
c_image = cv2.cvtColor(CombineToImage(out_img1,h,w,ch_out), cv2.COLOR_RGB2BGR,part)
|
||||
out_image_name = initial_filename + '.png'
|
||||
name_fig = os.path.join(save_out_dir, out_image_name)
|
||||
cv2.imwrite(name_fig,c_image)
|
||||
else:
|
||||
c_image = CombineToImage(out_img1,h,w,ch_out,part)
|
||||
out_image_name = initial_filename + '.png'
|
||||
name_fig = os.path.join(save_out_dir, out_image_name)
|
||||
cv2.imwrite(name_fig,c_image)
|
||||
|
||||
def infer_image(model_name,model_weight,target_image,out_image_name,block_size=(256,256),batch_size=1):
|
||||
json_file = open(model_name, 'r')
|
||||
loaded_model_json = json_file.read()
|
||||
json_file.close()
|
||||
model = model_from_json(loaded_model_json,custom_objects={'relu6': tf.nn.relu6})
|
||||
#model = model_from_json(loaded_model_json,custom_objects={'HeNormal':tf.keras.initializers.he_normal(),'relu6': tf.nn.relu6, 'convert2gray': convert2gray,'Functional':tf.keras.models.Model})
|
||||
|
||||
|
||||
model.summary()
|
||||
#exit(0)
|
||||
|
||||
model.compile(optimizer='adam', loss = 'mean_squared_error')
|
||||
|
||||
model.load_weights(model_weight)
|
||||
|
||||
#if not os.path.exists(save_out_dir):
|
||||
# os.makedirs(save_out_dir)
|
||||
|
||||
M = block_size[0]
|
||||
N = block_size[1]
|
||||
#print(M,N)
|
||||
part = 8
|
||||
in_clr = cv2.imread(target_image,1)
|
||||
in1_image = cv2.cvtColor(in_clr, cv2.COLOR_BGR2RGB)
|
||||
in1_img = GetOverlappingBlocks(in1_image.copy(),M,N,part)
|
||||
#print(len(in1_img))
|
||||
prepared_data_blocks = prepare_data_blocks(in1_img,M)
|
||||
in1_img = []
|
||||
#prepared_data_blocks = NewGetOverlappingBlocks(in_clr.copy(),M,N,part)
|
||||
|
||||
out_img1 = model.predict(tf.convert_to_tensor(prepared_data_blocks), batch_size=batch_size)
|
||||
|
||||
num_img,ht,wd,ch_out = out_img1.shape
|
||||
h,w,ch = in_clr.shape
|
||||
#print(num_img)
|
||||
|
||||
if(ch_out>1):
|
||||
c_image = cv2.cvtColor(CombineToImage(out_img1,h,w,ch_out), cv2.COLOR_RGB2BGR,part)
|
||||
cv2.imwrite(out_image_name,c_image)
|
||||
else:
|
||||
c_image = CombineToImage(out_img1,h,w,ch_out,part)
|
||||
cv2.imwrite(out_image_name,c_image)
|
|
@ -0,0 +1,159 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.keras import datasets, layers, models
|
||||
from tensorflow.keras.models import Model, load_model
|
||||
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D, Lambda
|
||||
import tensorflow.keras.backend as K
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def gram_matrix(input_tensor):
|
||||
result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
|
||||
input_shape = tf.shape(input_tensor)
|
||||
num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
|
||||
return result/(num_locations)
|
||||
|
||||
'''
|
||||
# Content layer where will pull our feature maps
|
||||
content_layers = ['block2_conv2']
|
||||
|
||||
# Style layer of interest
|
||||
style_layers = ['block1_conv1',
|
||||
'block2_conv1',
|
||||
'block3_conv1',
|
||||
'block4_conv1',
|
||||
'block5_conv1']
|
||||
|
||||
num_content_layers = len(content_layers)
|
||||
num_style_layers = len(style_layers)
|
||||
'''
|
||||
|
||||
def vgg_layers(layer_names):
|
||||
""" Creates a vgg model that returns a list of intermediate output values."""
|
||||
# Load our model. Load pretrained VGG, trained on imagenet data
|
||||
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
|
||||
vgg.trainable = False
|
||||
|
||||
outputs = [vgg.get_layer(name).output for name in layer_names]
|
||||
|
||||
|
||||
model = tf.keras.Model([vgg.input], outputs)
|
||||
return model
|
||||
|
||||
class StyleContentModel(tf.keras.models.Model):
|
||||
def __init__(self, style_layers, content_layers):
|
||||
#tf.keras.backend.clear_session()
|
||||
super(StyleContentModel, self).__init__()
|
||||
self.vgg = vgg_layers(style_layers + content_layers)
|
||||
self.style_layers = style_layers
|
||||
self.content_layers = content_layers
|
||||
self.num_style_layers = len(style_layers)
|
||||
self.num_content_layers = len(content_layers)
|
||||
self.vgg.trainable = False
|
||||
|
||||
def call(self, inputs):
|
||||
"Expects float input in [0,1]"
|
||||
inputs = inputs*255.0
|
||||
preprocessed_input = tf.keras.applications.vgg19.preprocess_input(inputs)
|
||||
outputs = self.vgg(preprocessed_input)
|
||||
style_outputs, content_outputs = (outputs[:self.num_style_layers],
|
||||
outputs[self.num_style_layers:])
|
||||
|
||||
style_outputs = [gram_matrix(style_output)
|
||||
for style_output in style_outputs]
|
||||
|
||||
content_dict = {content_name:value
|
||||
for content_name, value
|
||||
in zip(self.content_layers, content_outputs)}
|
||||
|
||||
style_dict = {style_name:value
|
||||
for style_name, value
|
||||
in zip(self.style_layers, style_outputs)}
|
||||
|
||||
return {'content':content_dict, 'style':style_dict}
|
||||
|
||||
def Compute_PLoss(in_img,gt_img,style_weight,content_weight):
|
||||
#tf.keras.backend.clear_session()
|
||||
preprocessed_in = tf.keras.applications.vgg19.preprocess_input(in_img*255)
|
||||
preprocessed_gt = tf.keras.applications.vgg19.preprocess_input(gt_img*255)
|
||||
# Content layer where will pull our feature maps
|
||||
content_layers = ['block2_conv2']
|
||||
|
||||
# Style layer of interest
|
||||
style_layers = ['block1_conv1',
|
||||
'block2_conv1',
|
||||
'block3_conv1',
|
||||
'block4_conv1',
|
||||
'block5_conv1']
|
||||
extractor = StyleContentModel(style_layers, content_layers)
|
||||
in_out = extractor(preprocessed_in)
|
||||
gt_out = extractor(preprocessed_gt)
|
||||
style_outputs_in = in_out['style']
|
||||
content_outputs_in = in_out['content']
|
||||
style_outputs_gt = gt_out['style']
|
||||
content_outputs_gt = gt_out['content']
|
||||
style_loss = tf.add_n([tf.reduce_mean(abs(style_outputs_in[name]-style_outputs_gt[name])) for name in style_outputs_in.keys()])
|
||||
style_loss *= style_weight / extractor.num_style_layers
|
||||
content_loss = tf.add_n([tf.reduce_mean(abs(content_outputs_in[name]-content_outputs_gt[name])) for name in content_outputs_in.keys()])
|
||||
content_loss *= content_weight / extractor.num_content_layers
|
||||
PLoss = tf.math.add_n([style_loss,content_loss])
|
||||
return PLoss
|
||||
|
||||
def IlluminationLoss(y_gt,y_out,gray_flag=True,style_weight=1e-2,content_weight=1e2):
|
||||
|
||||
#tf.keras.backend.clear_session()
|
||||
|
||||
#####################################################################
|
||||
if(gray_flag):
|
||||
rgb_out = tf.image.grayscale_to_rgb(y_out)
|
||||
PLoss = Compute_PLoss(rgb_out,y_gt,style_weight,content_weight)
|
||||
#print('Ploss',PLoss)
|
||||
gray_gt = tf.image.rgb_to_grayscale(y_gt)
|
||||
gray_loss = tf.reduce_mean(abs(gray_gt - y_out)) #loss in gray space
|
||||
gray_loss = tf.math.scalar_mul(1e2,gray_loss)
|
||||
#print('gray loss', gray_loss)
|
||||
loss = tf.math.add_n([PLoss,gray_loss])
|
||||
#print('loss is', loss)
|
||||
return loss
|
||||
#####################################################################
|
||||
PLoss = Compute_PLoss(y_out,y_gt,style_weight,content_weight)
|
||||
#print('loss is', loss)
|
||||
#RGB Loss
|
||||
rgb_loss = tf.reduce_mean(abs(y_gt[:,:,:,0] - y_out[:,:,:,0])) + tf.reduce_mean(abs(y_gt[:,:,:,1] - y_out[:,:,:,1])) + tf.reduce_mean(abs(y_gt[:,:,:,2] - y_out[:,:,:,2]))#loss in RGB color space
|
||||
rgb_loss = tf.math.scalar_mul(1e2,rgb_loss)
|
||||
#print('######################')
|
||||
#print('rgb_loss',rgb_loss)
|
||||
#print('######################')
|
||||
#####################################################################
|
||||
#Color Loss (Hue)
|
||||
hsv_out = tf.image.rgb_to_hsv(y_out)
|
||||
hsv_gt = tf.image.rgb_to_hsv(y_gt)
|
||||
#hsv_loss = tf.reduce_mean(min(min(hsv_gt[:,:,:,0],hsv_out[:,:,:,0])*360+(360-max(hsv_gt[:,:,:,0],hsv_out[:,:,:,0])*360),abs(hsv_gt[:,:,:,0] - hsv_out[:,:,:,0])*360))
|
||||
hue_loss = tf.reduce_mean(abs(hsv_gt[:,:,:,0] - hsv_out[:,:,:,0])) #loss in hue color space
|
||||
hue_loss = tf.math.scalar_mul(1e2,hue_loss)
|
||||
#print('hue_loss',hue_loss)
|
||||
|
||||
yuv_out = tf.image.rgb_to_yuv(y_out)
|
||||
yuv_gt = tf.image.rgb_to_yuv(y_gt)
|
||||
y_loss = tf.reduce_mean(abs(yuv_gt[:,:,:,0] - yuv_out[:,:,:,0])) #loss in luminance
|
||||
y_loss = tf.math.scalar_mul(1e2,y_loss)
|
||||
#print('luminance loss', y_loss)
|
||||
|
||||
#####################################################################
|
||||
loss = tf.math.add_n([PLoss,rgb_loss])
|
||||
#loss = tf.math.add_n([PLoss,rgb_loss,y_loss])
|
||||
#loss = tf.math.add_n([PLoss,rgb_loss,hue_loss])
|
||||
#print('######################')
|
||||
#print('loss is', loss)
|
||||
#print('######################')
|
||||
|
||||
return loss
|
||||
|
||||
def illu_Loss(style_weight,content_weight,gray_flag):
|
||||
def ILoss(y_gt, y_out):
|
||||
return IlluminationLoss(y_gt,y_out,gray_flag,style_weight,content_weight)
|
||||
return ILoss
|
|
@ -0,0 +1,224 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.keras import datasets, layers, models
|
||||
from tensorflow.keras.models import Model, load_model
|
||||
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D, Lambda
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
def res_net_block(input_data, filters, conv_size):
|
||||
x = layers.Conv2D(filters, conv_size, activation=tf.nn.relu6, padding='same',kernel_initializer = 'he_normal')(input_data)
|
||||
x = layers.BatchNormalization()(x)
|
||||
x = layers.Conv2D(filters, conv_size, activation=tf.nn.relu6, padding='same',kernel_initializer = 'he_normal')(x)
|
||||
x = layers.BatchNormalization()(x)
|
||||
x = layers.Add()([x, input_data])
|
||||
x = layers.Activation(tf.nn.relu6)(x)
|
||||
return x
|
||||
|
||||
def convert2gray(in_tensor):
|
||||
out = tf.image.rgb_to_grayscale(in_tensor)
|
||||
return out
|
||||
|
||||
def CreateModel_M16_binary(input_shape = (None, None, 3),batch_size=None):
|
||||
_strides=(1, 1)
|
||||
# Define the input as a tensor with shape input_shape
|
||||
X_input = Input(shape=input_shape,batch_size=batch_size)
|
||||
gray_in = layers.Lambda(lambda x : convert2gray(x))(X_input)
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(X_input)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut3 = out
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = layers.add([shortcut3, out])
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.Conv2D(1, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([gray_in, out])
|
||||
out = tf.math.sigmoid(out)
|
||||
# Create model
|
||||
model = Model(inputs = X_input, outputs = out, name='M16Gray')
|
||||
return model
|
||||
|
||||
def CreateModel_M16_color(input_shape = (None, None, 3),batch_size=None):
|
||||
_strides=(1, 1)
|
||||
# Define the input as a tensor with shape input_shape
|
||||
X_input = Input(shape=input_shape,batch_size=batch_size)
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(X_input)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut3 = out
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = res_net_block(out, 16, 3)
|
||||
out = layers.add([shortcut3, out])
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.Conv2D(3, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([X_input, out])
|
||||
out = tf.math.sigmoid(out)
|
||||
# Create model
|
||||
model = Model(inputs = X_input, outputs = out, name='M16Color')
|
||||
return model
|
||||
|
||||
|
||||
def CreateModel_M32_binary(input_shape = (None, None, 3),batch_size=None):
|
||||
_strides=(1, 1)
|
||||
# Define the input as a tensor with shape input_shape
|
||||
X_input = Input(shape=input_shape,batch_size=batch_size)
|
||||
gray_in = layers.Lambda(lambda x : convert2gray(x))(X_input)
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(X_input)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut3 = out
|
||||
out = layers.Conv2D(32, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut2 = out
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = layers.add([shortcut2, out])
|
||||
out = layers.Conv2D(32, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([shortcut3, out])
|
||||
out = layers.Conv2D(1, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([gray_in, out])
|
||||
out = tf.math.sigmoid(out)
|
||||
# Create model
|
||||
model = Model(inputs = X_input, outputs = out, name='IlluNet')
|
||||
|
||||
return model
|
||||
|
||||
def CreateModel_M32_color(input_shape = (None, None, 3),batch_size=None):
|
||||
_strides=(1, 1)
|
||||
# Define the input as a tensor with shape input_shape
|
||||
X_input = Input(shape=input_shape,batch_size=batch_size)
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(X_input)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut3 = out
|
||||
out = layers.Conv2D(32, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut2 = out
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = res_net_block(out, 32, 3)
|
||||
out = layers.add([shortcut2, out])
|
||||
out = layers.Conv2D(32, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([shortcut3, out])
|
||||
out = layers.Conv2D(3, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([X_input, out])
|
||||
out = tf.math.sigmoid(out)
|
||||
# Create model
|
||||
model = Model(inputs = X_input, outputs = out, name='IlluNet')
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def CreateModel_M64_binary(input_shape = (None, None, 3),batch_size=None):
|
||||
_strides=(1, 1)
|
||||
# Define the input as a tensor with shape input_shape
|
||||
X_input = Input(shape=input_shape,batch_size=batch_size)
|
||||
gray_in = layers.Lambda(lambda x : convert2gray(x))(X_input)
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(X_input)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut3 = out
|
||||
out = layers.Conv2D(32, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut2 = out
|
||||
out = layers.Conv2D(64, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut1 = out
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = layers.add([shortcut1, out])
|
||||
out = layers.Conv2D(64, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.Conv2D(32, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([shortcut2, out])
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([shortcut3, out])
|
||||
out = layers.Conv2D(1, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([gray_in, out])
|
||||
out = tf.math.sigmoid(out)
|
||||
# Create model
|
||||
model = Model(inputs = X_input, outputs = out, name='IlluNet')
|
||||
|
||||
return model
|
||||
|
||||
def CreateModel_M64_color(input_shape = (None, None, 3),batch_size=None):
|
||||
_strides=(1, 1)
|
||||
# Define the input as a tensor with shape input_shape
|
||||
X_input = Input(shape=input_shape,batch_size=batch_size)
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(X_input)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut3 = out
|
||||
out = layers.Conv2D(32, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut2 = out
|
||||
out = layers.Conv2D(64, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
shortcut1 = out
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = res_net_block(out, 64, 3)
|
||||
out = layers.add([shortcut1, out])
|
||||
out = layers.Conv2D(64, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.Conv2D(32, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([shortcut2, out])
|
||||
out = layers.Conv2D(16, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([shortcut3, out])
|
||||
out = layers.Conv2D(3, kernel_size=(3, 3),activation=tf.nn.relu6, strides=_strides, padding='same',kernel_initializer = 'he_normal')(out)
|
||||
out = layers.BatchNormalization()(out)
|
||||
out = layers.add([X_input, out])
|
||||
out = tf.math.sigmoid(out)
|
||||
# Create model
|
||||
model = Model(inputs = X_input, outputs = out, name='IlluNet')
|
||||
|
||||
return model
|
||||
|
||||
|
||||
|
||||
def GetModel(model_name='M32',gray=True,block_size=(None,None),batch_size=None):
|
||||
input_shape = (block_size[0],block_size[1],3)
|
||||
if(model_name=='M64'):
|
||||
if(gray):
|
||||
return CreateModel_M64_binary(input_shape,batch_size)
|
||||
else:
|
||||
return CreateModel_M64_color(input_shape,batch_size)
|
||||
elif(model_name=='M32'):
|
||||
if(gray):
|
||||
return CreateModel_M32_binary(input_shape,batch_size)
|
||||
else:
|
||||
return CreateModel_M32_color(input_shape,batch_size)
|
||||
else:
|
||||
if(gray):
|
||||
return CreateModel_M16_binary(input_shape,batch_size)
|
||||
else:
|
||||
return CreateModel_M16_color(input_shape,batch_size)
|
||||
|
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/Image-467.jpg
Normal file
После Ширина: | Высота: | Размер: 841 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/Image-467_dnn.jpeg
Normal file
После Ширина: | Высота: | Размер: 740 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/bill_dnn.jpg
Normal file
После Ширина: | Высота: | Размер: 691 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/bill_org.jpg
Normal file
После Ширина: | Высота: | Размер: 992 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/book_dnn.jpg
Normal file
После Ширина: | Высота: | Размер: 277 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/book_org.jpg
Normal file
После Ширина: | Высота: | Размер: 783 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/pres1_dnn.jpg
Normal file
После Ширина: | Высота: | Размер: 439 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/pres1_org.jpg
Normal file
После Ширина: | Высота: | Размер: 542 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/writing_dnn.jpg
Normal file
После Ширина: | Высота: | Размер: 336 KiB |
Двоичные данные
contrib/document_cleanup/light_weight_document_cleanup_ICDAR2021/sample_input_output/writing_org.jpg
Normal file
После Ширина: | Высота: | Размер: 736 KiB |
|
@ -0,0 +1,13 @@
|
|||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import cv2
|
||||
import os
|
||||
import sys
|
||||
|
||||
from train import train
|
||||
|
||||
data_folder = 'sample_data'
|
||||
gt_folder = 'sample_gt_data'
|
||||
batch_size = 21
|
||||
|
||||
train(data_folder,gt_folder,dataset_path='dataset',checkpoint='checkpoints',train_batch_size=batch_size)
|
|
@ -0,0 +1,163 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.keras import datasets, layers, models
|
||||
from tensorflow.keras.models import Model, load_model
|
||||
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D, Lambda
|
||||
from sklearn.model_selection import train_test_split
|
||||
import os
|
||||
import sys
|
||||
import cv2
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
|
||||
from CreateTrainingData import GenerateTrainingBlocks
|
||||
from model import GetModel
|
||||
from utils import load_tf_img
|
||||
from loss_function import IlluminationLoss, illu_Loss
|
||||
|
||||
#os.environ["CUDA_VISIBLE_DEVICES"]='0,1,2,3'
|
||||
os.environ["CUDA_VISIBLE_DEVICES"]='0'
|
||||
|
||||
#gpu_devices = tf.config.experimental.list_physical_devices('GPU')
|
||||
#tf.config.experimental.set_memory_growth(gpu_devices[0], True)
|
||||
|
||||
#tf.config.experimental.set_memory_growth(gpu_devices[0], True)
|
||||
##tf.config.experimental.set_memory_growth(gpu_devices[1], True)
|
||||
##tf.config.experimental.set_memory_growth(gpu_devices[2], True)
|
||||
##tf.config.experimental.set_memory_growth(gpu_devices[3], True)
|
||||
|
||||
#mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:2", "/gpu:3"])
|
||||
#mirrored_strategy = tf.distribute.MirroredStrategy()
|
||||
|
||||
#tf.config.run_functions_eagerly(True)
|
||||
tf.config.experimental_run_functions_eagerly(True)
|
||||
|
||||
def GetTrainFileNames(file_name_path):
|
||||
train_image_names = []
|
||||
with open(file_name_path) as fp:
|
||||
for line in fp:
|
||||
filename = line.rstrip()
|
||||
name = filename
|
||||
train_image_names.append(name)
|
||||
|
||||
return train_image_names
|
||||
|
||||
|
||||
def ImageResizeSquare(image):
|
||||
if(image.shape[1]!=image.shape[0]):
|
||||
width = max(image.shape[1],image.shape[0])
|
||||
height = width
|
||||
dim = (width, height)
|
||||
resized = cv2.resize(image, dim, interpolation = cv2.INTER_LANCZOS4)
|
||||
return resized
|
||||
else:
|
||||
return image
|
||||
|
||||
def GetData(filenames,path,block_size=(256,256)):
|
||||
max_d = max(block_size[0],block_size[1])
|
||||
gt_imgs = []
|
||||
in_imgs = []
|
||||
cnt = 0
|
||||
for name in filenames:
|
||||
#print(cnt,len(filenames))
|
||||
gt_filename = path + '/gt' + name
|
||||
in_filename = path + '/' + name
|
||||
gt_image = load_tf_img(cv2.imread(gt_filename,1),max_d)
|
||||
in_image = load_tf_img(cv2.imread(in_filename,1),max_d)
|
||||
in_imgs.append(in_image)
|
||||
gt_imgs.append(gt_image)
|
||||
cnt += 1
|
||||
return in_imgs, gt_imgs
|
||||
|
||||
class My_Custom_Generator(tf.keras.utils.Sequence) :
|
||||
|
||||
def __init__(self, image_filenames, img_dir, batch_size) :
|
||||
self.image_filenames = image_filenames
|
||||
self.batch_size = batch_size
|
||||
self.img_dir = img_dir
|
||||
|
||||
|
||||
def __len__(self) :
|
||||
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
|
||||
|
||||
|
||||
def __getitem__(self, idx) :
|
||||
batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
|
||||
gt_imgs = []
|
||||
in_imgs = []
|
||||
for name in batch_x:
|
||||
#print(name,idx)
|
||||
gt_filename = self.img_dir + '/gt' + name
|
||||
in_filename = self.img_dir + '/' + name
|
||||
gt_image = load_tf_img(ImageResizeSquare(cv2.cvtColor(cv2.imread(gt_filename,1), cv2.COLOR_BGR2RGB)))
|
||||
in_image = load_tf_img(ImageResizeSquare(cv2.cvtColor(cv2.imread(in_filename,1), cv2.COLOR_BGR2RGB)))
|
||||
in_imgs.append(in_image)
|
||||
gt_imgs.append(gt_image)
|
||||
return tf.convert_to_tensor(in_imgs), tf.convert_to_tensor(gt_imgs)
|
||||
|
||||
def train(data_folder,gt_folder,dataset_path='dataset',checkpoint='checkpoints',epochs=10,pretrain_flag=False,pretrain_model_weight_path=None,model_name='M32',gray_flag=True,block_size=(256,256),train_batch_size=1):
|
||||
block_height = block_size[0]
|
||||
block_width = block_size[1]
|
||||
print(block_height,block_width)
|
||||
print(data_folder)
|
||||
print(gt_folder)
|
||||
|
||||
train_path, train_filenames = GenerateTrainingBlocks(data_folder=data_folder,gt_folder=gt_folder,dataset_path=dataset_path,M=block_height ,N=block_width)
|
||||
train_image_names = GetTrainFileNames(train_filenames)
|
||||
X_train_filenames, X_val_filenames, y_train, y_val = train_test_split(
|
||||
train_image_names, train_image_names, test_size=0.2, random_state=1)
|
||||
|
||||
my_training_batch_generator = My_Custom_Generator(X_train_filenames, train_path, train_batch_size)
|
||||
my_validation_batch_generator = My_Custom_Generator(X_val_filenames, train_path, train_batch_size)
|
||||
in_imgs, gt_imgs = GetData(train_image_names[:2],train_path)
|
||||
#print(IlluminationLoss(gt_imgs[0][tf.newaxis, :],in_imgs[0][tf.newaxis, :],style_weight=1e-1,content_weight=1e1,gray_flag=gray_flag))
|
||||
|
||||
if(gray_flag):
|
||||
print(IlluminationLoss(gt_imgs[0][tf.newaxis, :],tf.image.rgb_to_grayscale(in_imgs[0][tf.newaxis, :]),style_weight=1e-1,content_weight=1e1,gray_flag=gray_flag))
|
||||
else:
|
||||
print(IlluminationLoss(gt_imgs[0][tf.newaxis, :],in_imgs[0][tf.newaxis, :],style_weight=1e-1,content_weight=1e1,gray_flag=gray_flag ))
|
||||
|
||||
#logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
logdir = os.path.join('logs','scalars')
|
||||
logdir = os.path.join(logdir,datetime.now().strftime("%Y%m%d-%H%M%S"))
|
||||
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
|
||||
|
||||
|
||||
custom_loss = illu_Loss(style_weight=1e-1,content_weight=1e1,gray_flag=gray_flag)
|
||||
model = GetModel(model_name=model_name,gray=gray_flag,block_size=block_size)
|
||||
opt = tf.keras.optimizers.Adam()
|
||||
model.compile(optimizer=opt, loss = custom_loss)
|
||||
|
||||
#Initialize model with pre-trained weights
|
||||
if(pretrain_flag):
|
||||
model.load_weights(pretrain_model_weight_path)
|
||||
|
||||
#Saving Model file to checkpoint folder
|
||||
Illumodel_json = model.to_json()
|
||||
model_name_suffix = ''
|
||||
model_weight_suffix = ''
|
||||
if(gray_flag):
|
||||
model_name_suffix = '_gray.json'
|
||||
model_weight_suffix = '_gray'
|
||||
else:
|
||||
model_name_suffix = '_color.json'
|
||||
model_weight_suffix = '_color'
|
||||
save_model_name = model_name + model_name_suffix
|
||||
#print(checkpoint)
|
||||
save_model_path = os.path.join(checkpoint, save_model_name)
|
||||
print(save_model_path)
|
||||
with open(save_model_path, "w") as json_file:
|
||||
json_file.write(Illumodel_json)
|
||||
json_file.close()
|
||||
# checkpoint
|
||||
model_weight_name = model_name + model_weight_suffix + '_' + data_folder +'_epoch-{epoch:02d}.hdf5'
|
||||
full_model_weight_path = os.path.join(checkpoint, model_weight_name)
|
||||
#print(filepath)
|
||||
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(full_model_weight_path, monitor='val_loss', verbose=1, save_best_only=True)
|
||||
|
||||
callbacks_list = [tensorboard_callback, model_checkpoint]
|
||||
|
||||
training_history = model.fit(my_training_batch_generator,
|
||||
epochs = epochs, verbose=1, workers = 21, use_multiprocessing = False,
|
||||
validation_data = my_validation_batch_generator,
|
||||
callbacks=callbacks_list)
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
import tensorflow as tf
|
||||
import cv2
|
||||
|
||||
def ImageResize(image,factor=0.6):
|
||||
width = int(image.shape[1] * factor)
|
||||
height = int(image.shape[0] * factor)
|
||||
dim = (width, height)
|
||||
#print(image.shape)
|
||||
resized = cv2.resize(image, dim, interpolation = cv2.INTER_LANCZOS4)
|
||||
#print(resized.shape)
|
||||
return resized
|
||||
|
||||
def getListOfFiles(dirName):
|
||||
print(dirName)
|
||||
# create a list of file and sub directories
|
||||
# names in the given directory
|
||||
listOfFile = os.listdir(dirName)
|
||||
allFiles = list()
|
||||
# Iterate over all the entries
|
||||
for entry in listOfFile:
|
||||
allFiles.append(entry)
|
||||
return allFiles
|
||||
|
||||
def GetOverlappingBlocks(im,M=256,N=256,Part=8):
|
||||
tiles = []
|
||||
tile = np.zeros((M,N,3),dtype=np.uint8)
|
||||
#tile[:,:,2] = 255
|
||||
|
||||
x = 0
|
||||
y = 0
|
||||
x_start = 0
|
||||
y_start = 0
|
||||
while y < im.shape[0]:
|
||||
while x < im.shape[1]:
|
||||
if(x!=0):
|
||||
x_start = x - int(N/Part)
|
||||
if(y!=0):
|
||||
y_start = y - int(M/Part)
|
||||
if(y_start+M>im.shape[0]):
|
||||
if(x_start+N>im.shape[1]):
|
||||
tile[0:im.shape[0]-y_start,0:im.shape[1]-x_start,:] = im[y_start:im.shape[0],x_start:im.shape[1],:]
|
||||
else:
|
||||
tile[0:im.shape[0]-y_start,0:N,:] = im[y_start:im.shape[0],x_start:x_start+N,:]
|
||||
else:
|
||||
if(x_start+N>im.shape[1]):
|
||||
tile[0:M,0:im.shape[1]-x_start,:] = im[y_start:y_start+M,x_start:im.shape[1],:]
|
||||
else:
|
||||
tile[0:M,0:N,:] = im[y_start:y_start+M,x_start:x_start+N,:]
|
||||
|
||||
|
||||
#pre_tile = cv2.cvtColor(PreProcessInput(cv2.cvtColor(tile, cv2.COLOR_RGB2BGR)), cv2.COLOR_BGR2RGB)
|
||||
#tiles.append(load_tf_img(pre_tile,M))
|
||||
|
||||
#tiles.append(load_tf_img(tile,M))
|
||||
tiles.append(tile)
|
||||
|
||||
tile = np.zeros((M,N,3),dtype=np.uint8)
|
||||
#tile[:,:,2] = 255
|
||||
x = x_start + N
|
||||
y = y_start + M
|
||||
x = 0
|
||||
x_start = 0
|
||||
return tiles
|
||||
|
||||
|
||||
def CombineToImage(imgs,h,w,ch,Part=8):
|
||||
Image = np.zeros((h,w,ch),dtype=np.float32)
|
||||
Image_flag = np.zeros((h,w),dtype=bool)
|
||||
i = 0
|
||||
j = 0
|
||||
i_end = 0
|
||||
j_end = 0
|
||||
for k in range(len(imgs)):
|
||||
#part_img = np.copy(imgs[k,:,:,:])
|
||||
part_img = np.copy(imgs[k])
|
||||
hh,ww,cc = part_img.shape
|
||||
i_end = min(h,i + hh)
|
||||
j_end = min(w,j + ww)
|
||||
|
||||
|
||||
for m in range(hh):
|
||||
for n in range(ww):
|
||||
if(i+m<h):
|
||||
if(j+n<w):
|
||||
if(Image_flag[i+m,j+n]):
|
||||
|
||||
Image[i+m,j+n,:] = (Image[i+m,j+n,:] + part_img[m,n,:])/2
|
||||
else:
|
||||
Image[i+m,j+n,:] = np.copy(part_img[m,n,:])
|
||||
|
||||
Image_flag[i:i_end,j:j_end] = True
|
||||
j = min(w-1,j + ww - int(ww/Part))
|
||||
#print(i,j,w)
|
||||
#print(k,len(imgs))
|
||||
if(j_end==w):
|
||||
j = 0
|
||||
i = min(h-1,i + hh - int(hh/Part))
|
||||
Image = Image*255.0
|
||||
return Image.astype(np.uint8)
|
||||
|
||||
def load_tf_img(img,max_dim=256):
|
||||
img = tf.convert_to_tensor(img)
|
||||
#print(img)
|
||||
img = tf.image.convert_image_dtype(img, tf.float32)
|
||||
#print(img)
|
||||
shape = tf.cast(tf.shape(img)[:-1], tf.float32)
|
||||
#print(shape)
|
||||
long_dim = max(shape)
|
||||
scale = max_dim / long_dim
|
||||
new_shape = tf.cast(shape * scale, tf.int32)
|
||||
#print(new_shape)
|
||||
img = tf.image.resize(img, new_shape)
|
||||
return img
|