-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor DeclutterService to a separate package.
Add WavenetService package Remove old dependencies Rename application due to new domain Add reading level and grade level calculation
- Loading branch information
Showing
9 changed files
with
179 additions
and
189 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 4 additions & 4 deletions
8
src/main/java/io/pdfit/PdfitApplication.java → ...n/java/io/klutter/KlutterApplication.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
79 changes: 79 additions & 0 deletions
79
src/main/java/io/klutter/declutterservice/DeclutterService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
package io.klutter.declutterservice; | ||
|
||
import io.github.bonigarcia.wdm.WebDriverManager; | ||
import io.whelk.flesch.kincaid.ReadabilityCalculator; | ||
import net.dankito.readability4j.Readability4J; | ||
import okhttp3.HttpUrl; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.safety.Safelist; | ||
import org.openqa.selenium.WebDriver; | ||
import org.openqa.selenium.chrome.ChromeDriver; | ||
import org.openqa.selenium.chrome.ChromeOptions; | ||
import org.springframework.web.bind.annotation.RequestMapping; | ||
import org.springframework.web.bind.annotation.RestController; | ||
|
||
import java.io.IOException; | ||
|
||
@RestController | ||
@RequestMapping("/api/v1") | ||
public class DeclutterService { | ||
|
||
@RequestMapping("/declutter") | ||
public String index() throws IOException { | ||
|
||
String url = "https://realpython.com/python-sockets/"; | ||
|
||
// Selenium. Using Selenium because jsoup doesn't handle JS and lazy loading. | ||
// System.setProperty("webdriver.chrome.driver", "/home/dave/chromedriver"); | ||
ChromeOptions options = new ChromeOptions();options.addArguments("--headless"); | ||
//WebDriver driver = new ChromeDriver(options); | ||
|
||
// Using Webdriver | ||
WebDriverManager.chromedriver().setup(); | ||
WebDriver driver = new ChromeDriver(options); | ||
|
||
// ToDo: Receive URL from frontend. | ||
driver.get(url); | ||
|
||
// Get the raw HTML source. | ||
String html = driver.getPageSource(); | ||
|
||
// Parse with Jsoup, so we can work with it.; | ||
Document doc = Jsoup.parse(html); | ||
|
||
// ToDo: Do a bit of sanitization on the HTML before passing to the PDF service. | ||
String safe = Jsoup.clean(doc.html(), Safelist.basic()); | ||
|
||
// Process with the readability4j mozilla readability.js wrapper. | ||
Readability4J readability4J = new Readability4J(url, doc); | ||
net.dankito.readability4j.Article article = readability4J.parse(); | ||
|
||
// returns extracted content in a <div> element | ||
String extractedContentHtml = article.getContent(); | ||
// to get content wrapped in <html> tags and encoding set to UTF-8, see chapter 'Output encoding' | ||
String extractedContentHtmlWithUtf8Encoding = article.getContentWithUtf8Encoding(); | ||
String extractedContentPlainText = article.getTextContent(); | ||
String title = article.getTitle(); | ||
String byline = article.getByline(); | ||
String excerpt = article.getExcerpt(); | ||
|
||
|
||
// Get the reading ease score. | ||
double ease = ReadabilityCalculator.calculateReadingEase(extractedContentPlainText); | ||
|
||
// Get the grade level score. | ||
double grade = ReadabilityCalculator.calculateGradeLevel(extractedContentPlainText); | ||
|
||
// Check it's working | ||
System.out.println(ease + " " + grade); | ||
|
||
// ToDo: Add user, title, excerpt, byline, content, ease, grade and tag array to model. | ||
|
||
// Return the clean HTML | ||
return extractedContentHtml; | ||
} | ||
|
||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package io.klutter.pdfservice; | ||
import com.github.jhonnymertz.wkhtmltopdf.wrapper.Pdf; | ||
import com.github.jhonnymertz.wkhtmltopdf.wrapper.configurations.WrapperConfig; | ||
import com.github.jhonnymertz.wkhtmltopdf.wrapper.params.Param; | ||
import io.github.bonigarcia.wdm.WebDriverManager; | ||
import net.dankito.readability4j.Readability4J; | ||
import okhttp3.HttpUrl; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.safety.Safelist; | ||
import org.openqa.selenium.WebDriver; | ||
import org.openqa.selenium.chrome.ChromeDriver; | ||
|
||
import org.openqa.selenium.chrome.ChromeOptions; | ||
import org.openqa.selenium.firefox.FirefoxDriver; | ||
import org.springframework.web.bind.annotation.RequestMapping; | ||
import org.springframework.web.bind.annotation.RestController; | ||
|
||
import java.io.*; | ||
|
||
@RestController | ||
@RequestMapping("/api/v1") | ||
public class PdfService { | ||
|
||
|
||
|
||
@RequestMapping("/pdf") | ||
|
||
public String index(){ | ||
|
||
String url = "https://realpython.com/python-sockets/"; | ||
Document doc = null; | ||
try { | ||
doc = Jsoup.connect("https://realpython.com/python-sockets/").get(); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
|
||
Pdf pdf = new Pdf(); | ||
|
||
// pdf.addPageFromString(doc.toString()); | ||
pdf.addPageFromUrl("https://realpython.com/python-sockets/"); | ||
|
||
// Add a Table of Contents | ||
pdf.addToc(); | ||
pdf.addParam(new Param("--disable-javascript")); | ||
|
||
// Add styling for Table of Contents | ||
// pdf.addTocParam(new Param("--xsl-style-sheet", "my_toc.xsl")); | ||
|
||
// Save the PDF | ||
try { | ||
pdf.saveAs("output.pdf"); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} catch (InterruptedException e) { | ||
e.printStackTrace(); | ||
} | ||
|
||
return "Hello from another mapping"; | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
package io.klutter.wavenetservice; | ||
|
||
public class WavenetService { | ||
} |
34 changes: 0 additions & 34 deletions
34
src/main/java/io/pdfit/declutterservice/DeclutterService.java
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.