Skip to content

Commit b7d6d95

Browse files
committed
use thread for PDFBOX to make the app responsive
1 parent 3f5491e commit b7d6d95

6 files changed

+106
-65
lines changed

BasicCAT/BasicCAT.b4j

+24-24
Original file line numberDiff line numberDiff line change
@@ -207,32 +207,32 @@ Module38=Map2Xml
207207
Module39=SRX
208208
Module4=CustomListView
209209
Module40=POIWord
210-
Module41=pdfbox
211-
Module42=git
212-
Module43=dictWebView
213-
Module44=FontPicker
214-
Module45=TermEditor
215-
Module46=TermManager
216-
Module47=HistoryViewer
217-
Module48=ClientKVS
218-
Module49=statistics
210+
Module41=git
211+
Module42=dictWebView
212+
Module43=FontPicker
213+
Module44=TermEditor
214+
Module45=TermManager
215+
Module46=HistoryViewer
216+
Module47=ClientKVS
217+
Module48=statistics
218+
Module49=LanguagePairSelector
219219
Module5=txtFilter
220-
Module50=LanguagePairSelector
221-
Module51=idmlUtils
222-
Module52=editDistance
223-
Module53=InputBox
224-
Module54=MTParamsFiller
225-
Module55=filterGenericUtils
226-
Module56=TBX
227-
Module57=CallSubUtils
228-
Module58=viewSegment
229-
Module59=serverLauncher
220+
Module50=idmlUtils
221+
Module51=editDistance
222+
Module52=InputBox
223+
Module53=MTParamsFiller
224+
Module54=filterGenericUtils
225+
Module55=TBX
226+
Module56=CallSubUtils
227+
Module57=viewSegment
228+
Module58=serverLauncher
229+
Module59=languageChooser
230230
Module6=TM
231-
Module60=languageChooser
232-
Module61=TMEditor
233-
Module62=TextFlow
234-
Module63=opennlp
235-
Module64=coordinate
231+
Module60=TMEditor
232+
Module61=TextFlow
233+
Module62=opennlp
234+
Module63=coordinate
235+
Module64=pdfbox
236236
Module7=Term
237237
Module8=KeyValueStore
238238
Module9=ProjectSettings

BasicCAT/BasicCAT.b4j.meta

+1-1
Original file line numberDiff line numberDiff line change
@@ -194,4 +194,4 @@ ModuleClosedNodes7=
194194
ModuleClosedNodes8=
195195
ModuleClosedNodes9=
196196
SelectedBuild=0
197-
VisibleModules=1,2,3,4,5,6,7,8,9,10,42,15,12,21
197+
VisibleModules=1,2,3,4,5,6,7,8,9,10,41,15,12,21,64,36,59

BasicCAT/PDF2TXT.bas

+21-19
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,16 @@ Sub StripButton_MouseClicked (EventData As MouseEvent)
3131
fx.Msgbox(frm,"Please choose a pdf file first.","")
3232
Return
3333
End If
34+
Dim pdfbox1 As pdfbox
35+
pdfbox1.Initialize(label1.Text)
3436
If IncludePageNumCheckBox.Checked Then
3537
If offsetTextField.Text="" Or PageAffixTextField.Text="" Then
3638
fx.Msgbox(frm,"Please fill affix and offset first.","")
3739
Return
3840
End If
39-
TextArea1.text=pdfbox.stripPDFText(label1.Text,True,FacingPageCheckBox.Checked,PageAffixTextField.Text,offsetTextField.Text)
41+
TextArea1.text=pdfbox1.stripPDFText(True,FacingPageCheckBox.Checked,PageAffixTextField.Text,offsetTextField.Text)
4042
Else
41-
TextArea1.text=pdfbox.stripPDFText(label1.Text,False,False,"",0)
43+
TextArea1.text=pdfbox1.stripPDFText(False,False,"",0)
4244
End If
4345
End Sub
4446

@@ -117,26 +119,23 @@ Sub ocrButton_MouseClicked (EventData As MouseEvent)
117119

118120
Dim lc As languageChooser
119121
lc.Initialize
120-
Dim langs As List
121-
langs=lc.ShowAndWait
122-
Dim langsParam As String
123-
For Each chkBox As CheckBox In langs
124-
If chkBox.Checked Then
125-
langsParam=langsParam&chkBox.Text&"+"
126-
End If
127-
Next
128-
If langsParam.EndsWith("+") Then
129-
langsParam=langsParam.SubString2(0,langsParam.Length-1)
130-
End If
131-
Log(langsParam)
122+
Dim langsParam As String=lc.ShowAndWait
132123
If langsParam="" Then
133124
Return
134125
End If
135126

136127
Label2.Text="Converting pdf to images..."
137128
Dim files As List
138-
wait for (pdfbox.getImage(File.GetFileParent(label1.Text),File.GetName(label1.Text))) complete (result As List)
139-
files=result
129+
files.Initialize
130+
Dim dir As String=File.GetFileParent(label1.Text)
131+
Dim filename As String=File.GetName(label1.Text)
132+
Dim pdfbox1 As pdfbox
133+
pdfbox1.Initialize(File.Combine(dir,filename))
134+
wait for (pdfbox1.getImageAsync()) complete (result As Object)
135+
For i=0 To pdfbox1.PageNum-1
136+
files.Add(File.Combine(dir,i&".jpg"))
137+
Next
138+
140139
Label2.Text="OCRing..."
141140
If IncludePageNumCheckBox.Checked Then
142141
wait for (ocrWithPagenum(files,langsParam,PageAffixTextField.Text,offsetTextField.Text)) complete (text As String)
@@ -200,7 +199,8 @@ Sub ocrWithPagenum(files As List,langsParam As String,affix As String,offset As
200199
Else
201200
path="tesseract"
202201
End If
203-
Dim content As String
202+
Dim contentSB As StringBuilder
203+
contentSB.Initialize
204204
Dim pdfnum As Int=0
205205
For i=0 To files.Size-1
206206
pdfnum=pdfnum+1
@@ -228,15 +228,17 @@ Sub ocrWithPagenum(files As List,langsParam As String,affix As String,offset As
228228
If Success And ExitCode = 0 Then
229229
Log("Success")
230230
Log(StdOut)
231-
content=content&pageStart&CRLF&CRLF&removeLines(File.ReadString(dir,i&".txt"))
231+
contentSB.Append(pageStart).Append(CRLF)
232+
contentSB.Append(CRLF).Append(File.ReadString(dir,i&".txt"))
233+
'removeLines
232234
Else
233235
Log("Error: " & StdErr)
234236
End If
235237

236238

237239
Next
238240

239-
Return content
241+
Return contentSB.ToString
240242
End Sub
241243

242244
Sub ocr(files As List,langsParam As String) As ResumableSub

BasicCAT/Project.bas

+3-2
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ Public Sub commitAndPush(commitMessage As String)
515515
End If
516516
End If
517517
Else
518-
wait for (updateLocalFileBasedonFetch(username,password,email)) Complete (success as Object)
518+
wait for (updateLocalFileBasedonFetch(username,password,email)) Complete (success As Object)
519519
Dim diffList As List
520520
diffList=projectGit.diffList
521521
Log(diffList)
@@ -575,7 +575,7 @@ Sub samelocalHeadAndRemoteHead(username As String,password As String,fetch As Bo
575575
Return result
576576
End Sub
577577

578-
Sub updateLocalFileBasedonFetch(username As String,password As String,email As String) as ResumableSub
578+
Sub updateLocalFileBasedonFetch(username As String,password As String,email As String) As ResumableSub
579579
wait for (samelocalHeadAndRemoteHead(username,password,True)) Complete (isSame As Boolean)
580580
If isSame = False Then
581581
Dim localHead,remoteHead As String
@@ -637,6 +637,7 @@ Sub updateLocalFileBasedonFetch(username As String,password As String,email As S
637637
End If
638638

639639
Log("worddir,after: "&projectGit.getWorkdirPath)
640+
return True
640641
End Sub
641642

642643
Sub updateWorkFile(filename As String) As Boolean

BasicCAT/languageChooser.bas

+17-2
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,26 @@ Public Sub Initialize
2424
Next
2525
End Sub
2626

27-
Public Sub ShowAndWait As List
27+
Public Sub ShowAndWait As String
2828
frm.ShowAndWait
29-
Return ListView1.Items
29+
Dim langsParam As String
30+
For Each chkBox As CheckBox In ListView1.Items
31+
If chkBox.Checked Then
32+
langsParam=langsParam&chkBox.Text&"+"
33+
End If
34+
Next
35+
If langsParam.EndsWith("+") Then
36+
langsParam=langsParam.SubString2(0,langsParam.Length-1)
37+
End If
38+
Log(langsParam)
39+
Return langsParam
3040
End Sub
3141

3242
Sub OKButton_MouseClicked (EventData As MouseEvent)
3343
frm.Close
44+
End Sub
45+
46+
Sub frm_CloseRequest (EventData As Event)
47+
ListView1.Items.Clear
48+
frm.Close
3449
End Sub

BasicCAT/pdfbox.bas

+40-17
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,27 @@
11
B4J=true
22
Group=Default Group
33
ModulesStructureVersion=1
4-
Type=StaticCode
5-
Version=6.51
4+
Type=Class
5+
Version=7.32
66
@EndOfDesignText@
7-
'Static code module
8-
Sub Process_Globals
7+
Sub Class_Globals
98
Private fx As JFX
9+
Private th As Thread
10+
Private doc As JavaObject
11+
Private path As String
1012
End Sub
1113

12-
Sub stripPDFText(filepath As String, includePageNum As Boolean,isFacingPage As Boolean,affix As String,offset As Int) As String
14+
'Initializes the object. You can add parameters to this method if needed.
15+
Public Sub Initialize(filePath As String)
16+
th.Initialise("th")
17+
path=filePath
1318
Dim PDDocument As JavaObject
1419
PDDocument.InitializeStatic("org.apache.pdfbox.pdmodel.PDDocument")
15-
Dim doc As JavaObject
16-
doc=PDDocument.RunMethodJO("load",Array(getFile(filepath)))
20+
doc=PDDocument.RunMethodJO("load",Array(getFile(filePath)))
21+
End Sub
22+
23+
Public Sub stripPDFText(includePageNum As Boolean,isFacingPage As Boolean,affix As String,offset As Int) As String
24+
1725
Dim PDFTextStripper As JavaObject
1826
PDFTextStripper.InitializeNewInstance("org.apache.pdfbox.text.PDFTextStripper",Null)
1927
Dim pageNum As Int
@@ -45,27 +53,43 @@ Sub stripPDFText(filepath As String, includePageNum As Boolean,isFacingPage As B
4553
Return text
4654
End Sub
4755

48-
Sub getImage(dir As String,filename As String) As ResumableSub
56+
57+
Public Sub getPageNum As Int
58+
Dim files As List
59+
files.Initialize
60+
SetSystemProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")
61+
Dim pageNum As Int
62+
pageNum=doc.RunMethod("getNumberOfPages",Null)
63+
Return pageNum
64+
End Sub
65+
66+
Public Sub getImageAsync As ResumableSub
67+
th.Start(Me,"getImage",Array As Object("placeholder"))
68+
wait for th_Ended(endedOK As Boolean, error As String)
69+
Log(endedOK)
70+
Return endedOK
71+
End Sub
72+
73+
Public Sub getImage(param As String)
4974
Dim files As List
5075
files.Initialize
5176
SetSystemProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")
5277
Dim PDDocument As JavaObject
5378
PDDocument.InitializeStatic("org.apache.pdfbox.pdmodel.PDDocument")
54-
Dim doc As JavaObject
55-
doc=PDDocument.RunMethodJO("load",Array(getFile(File.Combine(dir,filename))))
5679
Dim pageNum As Int
5780
pageNum=doc.RunMethod("getNumberOfPages",Null)
5881
Dim PDFRenderer As JavaObject
5982
PDFRenderer.InitializeNewInstance("org.apache.pdfbox.rendering.PDFRenderer",Array(doc))
6083
For i=0 To pageNum-1
6184
Log(i)
62-
Sleep(0)
63-
renderImageToFile(PDFRenderer,files,dir,i)
85+
'Sleep(0)
86+
'files.Add(File.Combine(dir,i&".jpg"))
87+
renderImageToFile(PDFRenderer,File.GetFileParent(path),i)
6488
Next
65-
Return files
89+
'Return files
6690
End Sub
6791

68-
Sub renderImageToFile(PDFRenderer As JavaObject,files As List,dir As String,i As Int)
92+
Sub renderImageToFile(PDFRenderer As JavaObject,dir As String,i As Int)
6993
Dim bi As JavaObject
7094
Dim dpi As Float
7195
dpi=150
@@ -76,11 +100,10 @@ Sub renderImageToFile(PDFRenderer As JavaObject,files As List,dir As String,i As
76100
imageIO.InitializeStatic("javax.imageio.ImageIO")
77101
imageIO.RunMethod("write",Array(bi,"jpg",out))
78102
out.Close
79-
files.Add(File.Combine(dir,i&".jpg"))
80103
End Sub
81104

82-
Sub getFile(path As String) As JavaObject
105+
Sub getFile(filepath As String) As JavaObject
83106
Dim fileJO As JavaObject
84-
fileJO.InitializeNewInstance("java.io.File",Array(path))
107+
fileJO.InitializeNewInstance("java.io.File",Array(filepath))
85108
Return fileJO
86109
End Sub

0 commit comments

Comments
 (0)