-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocument_scanner.py
158 lines (119 loc) · 5.96 KB
/
document_scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import cv2
import imutils
import argparse
import numpy as np
from pathlib import Path
from skimage.filters import threshold_local
def get_paper_contour(contours):
'''Finds the paper contour in the list of contours'''
for ctr in contours:
# Approximate the contour
perimeter = cv2.arcLength(ctr, True)
approximated_contour = cv2.approxPolyDP(ctr, 0.02 * perimeter, True)
# If the approximated contour has 4 points, then we can assume that we have found the paper
if len(approximated_contour) == 4:
return approximated_contour
return None
def order_points(points):
'''Orders the points in the contour such that the points are ordered clockwise starting from the top-left point'''
ordered_points = np.zeros((4, 2), dtype='float32') # top-left, top-right, bottom-left, bottom-right
coord_sum = points.sum(axis=1)
ordered_points[0] = points[np.argmin(coord_sum)] # top-left point has the smallest sum of x and y
ordered_points[3] = points[np.argmax(coord_sum)] # bottom-right point has the largest sum of x and y
coord_diff = np.diff(points, axis=1)
ordered_points[1] = points[np.argmin(coord_diff)] # top-right point has the smallest difference of x and y
ordered_points[2] = points[np.argmax(coord_diff)] # bottom-left point has the largest difference of x and y
return ordered_points
def calculate_document_ratio(contour):
'''Calculates the aspect ratio of the document'''
top_width = np.linalg.norm(contour[1] - contour[0])
bottom_width = np.linalg.norm(contour[3] - contour[2])
left_height = np.linalg.norm(contour[2] - contour[0])
right_height = np.linalg.norm(contour[3] - contour[1])
width = (top_width + bottom_width) / 2
height = (left_height + right_height) / 2
return width / height
def detect_color(image, threshold=50):
'''Determines if the document is colored or grayscale'''
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
saturation = hsv[:, :, 1]
mean_saturation = np.mean(saturation)
return mean_saturation > threshold
def enhance_lightness(image, binary_mask, alpha=1.25, beta=-20):
'''Enhances the lightness channel of the LAB image'''
lab_image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) # LAB = Lightness, A (green to red), B (blue to yellow)
L, A, B = cv2.split(lab_image)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) # Contrast Limited Adaptive Histogram Equalization
L = clahe.apply(L)
L = cv2.bitwise_and(L, L, mask=binary_mask) # Keep only the lightness channel where the paper is
L = cv2.convertScaleAbs(L, alpha=alpha, beta=beta) # Enhance the lightness channel
scan = cv2.merge([L, A, B])
scan = cv2.cvtColor(scan, cv2.COLOR_LAB2BGR)
return scan
def diagnostic_output():
'''Shows the image with the title'''
### Diagnostic Output 1 ###
cv2.imshow('Image', image)
cv2.imshow('Edges', edges)
cv2.waitKey(0)
### Diagnostic Output 2 ###
cv2.imshow('Paper Contour', image_contour)
cv2.waitKey(0)
### Diagnostic Output 3 ###
cv2.imshow('Original Image', original)
cv2.imshow('Scan', scan)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', required=True, help='Path to the image to be scanned')
args = parser.parse_args()
image_path = Path(args.image)
if not image_path.exists():
print(f'Error: {image_path} not found')
exit(1)
image = cv2.imread(image_path)
scale_ratio = image.shape[0] / 500.0 # Keep track of the ratio of the original image to the resized image
original = image.copy()
image = imutils.resize(image, height=500)
# Convert the image to grayscale, blur it, and find edges
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edges = cv2.Canny(gray, 75, 200)
### Diagnostic Output 1 ###
# Find all the contours (geometric shapes surrounded by edges) in the edge detected image, and keep the top 5
contours = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] # Get the top 5 contours
# Find the paper contour
paper_contour = get_paper_contour(contours)
if paper_contour is None:
print('Error: Paper not found')
exit(1)
# Show the paper contour
image_contour = image.copy()
cv2.drawContours(image_contour, [paper_contour], -1, (0, 255, 0), 2)
### Diagnostic Output 2 ###
# Apply a four point perspective transform to get a top-down view of the paper
paper_contour = paper_contour.reshape(4, 2).astype(np.float32) * scale_ratio
paper_contour = order_points(paper_contour)
aspect_ratio = calculate_document_ratio(paper_contour) # Get the aspect ratio of the document
# We want the document to be 800 pixels high
H = 800
W = int(H * aspect_ratio)
destination_points = np.array([[0, 0], [W, 0], [0, H], [W, H]], dtype='float32')
perspective_transform = cv2.getPerspectiveTransform(paper_contour, destination_points)
warped_image = cv2.warpPerspective(original, perspective_transform, (W, H))
# We give the warped image a paper scan effect
warped_image_grayscale = cv2.cvtColor(warped_image, cv2.COLOR_BGR2GRAY)
T = threshold_local(warped_image_grayscale, 11, offset=10, method='gaussian')
binary_mask = (warped_image_grayscale > T).astype('uint8') * 255
is_colored = detect_color(warped_image) # Dynamically determine if the document is colored or grayscale
if not is_colored:
scan = binary_mask
else:
scan = enhance_lightness(warped_image, binary_mask)
# interpolate the scanned image to increase the resolution
scan = cv2.resize(scan, (int(original.shape[0] * aspect_ratio), original.shape[0]), interpolation=cv2.INTER_CUBIC)
### Diagnostic Output 3 ###
diagnostic_output()