From aec6996f6f2a222ad6e6b271b55d9c6dbcc9cf8f Mon Sep 17 00:00:00 2001 From: Dave Davis Date: Mon, 21 Feb 2022 16:08:41 +0000 Subject: [PATCH] Refactor to MVC Add H2 DB for testing Remove future services for clarity Enable H2 console Swap Kdoc crudrepository implementation for a JpaRepository implementation --- .gitignore | 1 - Readme.md | 5 + src/main/java/io/klutter/DocumentService.java | 82 ---------------- .../java/io/klutter/KlutterApplication.java | 6 +- .../klutter/controllers/KdocController.java | 27 ++++++ .../controllers/KdocRestController.java | 27 ++++++ .../java/io/klutter/dao/KdocRepository.java | 12 --- .../java/io/klutter/data/KdocRepository.java | 13 +++ .../io/klutter/{entity => models}/Kdoc.java | 5 +- .../java/io/klutter/services/KdocService.java | 94 +++++++++++++++++++ .../{pdfservice => services}/PdfService.java | 11 +-- .../textanalytics/TextAnalyticsService.java | 8 -- .../wavenetservice/WavenetService.java | 4 - src/main/resources/application.properties | 2 + 14 files changed, 175 insertions(+), 122 deletions(-) delete mode 100644 src/main/java/io/klutter/DocumentService.java create mode 100644 src/main/java/io/klutter/controllers/KdocController.java create mode 100644 src/main/java/io/klutter/controllers/KdocRestController.java delete mode 100644 src/main/java/io/klutter/dao/KdocRepository.java create mode 100644 src/main/java/io/klutter/data/KdocRepository.java rename src/main/java/io/klutter/{entity => models}/Kdoc.java (96%) create mode 100644 src/main/java/io/klutter/services/KdocService.java rename src/main/java/io/klutter/{pdfservice => services}/PdfService.java (75%) delete mode 100644 src/main/java/io/klutter/textanalytics/TextAnalyticsService.java delete mode 100644 src/main/java/io/klutter/wavenetservice/WavenetService.java diff --git a/.gitignore b/.gitignore index c66f74c..3dc595d 100644 --- a/.gitignore +++ b/.gitignore @@ -33,7 +33,6 @@ build/ .vscode/ /bak/ /.mvn/ -/.mvn/ /mvnw /mvnw.cmd /secrets/ diff --git a/Readme.md b/Readme.md index e5b203a..a924032 100644 --- a/Readme.md +++ b/Readme.md @@ -16,6 +16,11 @@ https://davedavis.atlassian.net/jira/software/c/projects/PDFIT/boards/4/backlog +## In Mem testing DB +- http://localhost:8080/h2-console/ +- jdbc:h2:mem:testdatabase +- No user or pass. + ## Sprint 0 requirements - Introduction to Application (Overall), - List of User Stories (Overall), diff --git a/src/main/java/io/klutter/DocumentService.java b/src/main/java/io/klutter/DocumentService.java deleted file mode 100644 index e5ab8ad..0000000 --- a/src/main/java/io/klutter/DocumentService.java +++ /dev/null @@ -1,82 +0,0 @@ -package io.klutter; - -import io.github.bonigarcia.wdm.WebDriverManager; -import io.whelk.flesch.kincaid.ReadabilityCalculator; -import net.dankito.readability4j.Readability4J; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.safety.Safelist; -import org.openqa.selenium.WebDriver; -import org.openqa.selenium.chrome.ChromeDriver; -import org.openqa.selenium.chrome.ChromeOptions; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RestController; - -import java.io.IOException; - -@RestController -@RequestMapping("/api/v1") -public class DocumentService { - - @RequestMapping("/documents") - public String index() throws IOException { - - String url = "https://realpython.com/python-sockets/"; - - // Selenium. Using Selenium because jsoup doesn't handle JS and lazy loading. - // System.setProperty("webdriver.chrome.driver", "/home/dave/chromedriver"); - ChromeOptions options = new ChromeOptions();options.addArguments("--headless"); - //WebDriver driver = new ChromeDriver(options); - - // Using Webdriver - WebDriverManager.chromedriver().setup(); - WebDriver driver = new ChromeDriver(options); - - // ToDo: Receive URL from frontend. - driver.get(url); - - // Get the raw HTML source. - String html = driver.getPageSource(); - - // Parse with Jsoup, so we can work with it.; - Document doc = Jsoup.parse(html); - - // ToDo: Do a bit of sanitization on the HTML before passing to the PDF service. - String safe = Jsoup.clean(doc.html(), Safelist.basic()); - - // Process with the readability4j mozilla readability.js wrapper. - Readability4J readability4J = new Readability4J(url, doc); - net.dankito.readability4j.Article article = readability4J.parse(); - - // returns extracted content in a
element - String extractedContentHtml = article.getContent(); - // to get content wrapped in tags and encoding set to UTF-8, see chapter 'Output encoding' - String extractedContentHtmlWithUtf8Encoding = article.getContentWithUtf8Encoding(); - String extractedContentPlainText = article.getTextContent(); - String title = article.getTitle(); - String byline = article.getByline(); - String excerpt = article.getExcerpt(); - - - // Get the reading ease score. - double ease = ReadabilityCalculator.calculateReadingEase(extractedContentPlainText); - - // Get the grade level score. - double grade = ReadabilityCalculator.calculateGradeLevel(extractedContentPlainText); - - // Check it's working - System.out.println(ease + " " + grade); - - // Testing the sizing of the document for Azure -// System.out.println(">>>>>>>>>>>>>>>>>>>>>> " + extractedContentPlainText.length()); -// System.out.println(">>>>>>>>>>>>>>>>>>>>>> " + WordUtils.wrap(extractedContentPlainText, 40)); - - // ToDo: Add user, title, excerpt, byline, content, ease, grade and tag array to model. - - // Return the clean HTML - return extractedContentHtml; - } - - -} - diff --git a/src/main/java/io/klutter/KlutterApplication.java b/src/main/java/io/klutter/KlutterApplication.java index 5a4f478..6e974b0 100644 --- a/src/main/java/io/klutter/KlutterApplication.java +++ b/src/main/java/io/klutter/KlutterApplication.java @@ -1,7 +1,7 @@ package io.klutter; -import io.klutter.dao.KdocRepository; -import io.klutter.entity.Kdoc; +import io.klutter.data.KdocRepository; +import io.klutter.models.Kdoc; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.context.event.ApplicationReadyEvent; @@ -9,7 +9,7 @@ //Add the scanBasePackages parameter to the annotation as I added my services in //separate packages so, they need to be configured on application start. -@SpringBootApplication(scanBasePackages = {"io.klutter.declutterservice", "io.klutter.pdfservice", "io.klutter.dao"} ) +@SpringBootApplication(scanBasePackages = {"io.klutter.models", "io.klutter.controllers", "io.klutter.data", "io.klutter.services"} ) public class KlutterApplication { private final KdocRepository kdocRepository; diff --git a/src/main/java/io/klutter/controllers/KdocController.java b/src/main/java/io/klutter/controllers/KdocController.java new file mode 100644 index 0000000..9d8db6c --- /dev/null +++ b/src/main/java/io/klutter/controllers/KdocController.java @@ -0,0 +1,27 @@ +package io.klutter.controllers; + + +import io.klutter.models.Kdoc; +import io.klutter.services.KdocService; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; + +@RestController +@RequestMapping("/kdocs") +public class KdocController { + + private final KdocService kdocService; + + + public KdocController(KdocService kdocService) { + this.kdocService = kdocService; + } + + @GetMapping + public List getAllKdocs(){ + return kdocService.getAllKdocs(); + } +} diff --git a/src/main/java/io/klutter/controllers/KdocRestController.java b/src/main/java/io/klutter/controllers/KdocRestController.java new file mode 100644 index 0000000..d9e05fb --- /dev/null +++ b/src/main/java/io/klutter/controllers/KdocRestController.java @@ -0,0 +1,27 @@ +package io.klutter.controllers; + + +import io.klutter.models.Kdoc; +import io.klutter.services.KdocService; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; + +@RestController +@RequestMapping("/api/v1/kdocs") +public class KdocRestController { + + private final KdocService kdocService; + + + public KdocRestController(KdocService kdocService) { + this.kdocService = kdocService; + } + + @GetMapping + public List getAllKdocs(){ + return kdocService.getAllKdocs(); + } +} diff --git a/src/main/java/io/klutter/dao/KdocRepository.java b/src/main/java/io/klutter/dao/KdocRepository.java deleted file mode 100644 index ddbac95..0000000 --- a/src/main/java/io/klutter/dao/KdocRepository.java +++ /dev/null @@ -1,12 +0,0 @@ -package io.klutter.dao; - -import io.klutter.entity.Kdoc; -import org.springframework.data.repository.CrudRepository; -import org.springframework.stereotype.Repository; - -@Repository -public interface KdocRepository extends CrudRepository { - - // Additional non-CRUD related methods. - -} \ No newline at end of file diff --git a/src/main/java/io/klutter/data/KdocRepository.java b/src/main/java/io/klutter/data/KdocRepository.java new file mode 100644 index 0000000..f6748ee --- /dev/null +++ b/src/main/java/io/klutter/data/KdocRepository.java @@ -0,0 +1,13 @@ +package io.klutter.data; + +import io.klutter.models.Kdoc; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.repository.CrudRepository; +import org.springframework.stereotype.Repository; + +@Repository +public interface KdocRepository extends JpaRepository { + + // Additional non-CRUD related methods. + +} \ No newline at end of file diff --git a/src/main/java/io/klutter/entity/Kdoc.java b/src/main/java/io/klutter/models/Kdoc.java similarity index 96% rename from src/main/java/io/klutter/entity/Kdoc.java rename to src/main/java/io/klutter/models/Kdoc.java index cc08177..6d74d7e 100644 --- a/src/main/java/io/klutter/entity/Kdoc.java +++ b/src/main/java/io/klutter/models/Kdoc.java @@ -1,11 +1,12 @@ -package io.klutter.entity; +package io.klutter.models; import javax.persistence.*; import java.io.Serializable; @Entity @Table(name = "Kdoc") -public class Kdoc implements Serializable { +//public class Kdoc implements Serializable { +public class Kdoc { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) diff --git a/src/main/java/io/klutter/services/KdocService.java b/src/main/java/io/klutter/services/KdocService.java new file mode 100644 index 0000000..0d484cd --- /dev/null +++ b/src/main/java/io/klutter/services/KdocService.java @@ -0,0 +1,94 @@ +package io.klutter.services; + +import io.github.bonigarcia.wdm.WebDriverManager; +import io.klutter.data.KdocRepository; +import io.klutter.models.Kdoc; +import io.whelk.flesch.kincaid.ReadabilityCalculator; +import net.dankito.readability4j.Readability4J; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.safety.Safelist; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.chrome.ChromeDriver; +import org.openqa.selenium.chrome.ChromeOptions; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; + +@RestController +@RequestMapping("/api/v1") +public class KdocService { + + private final KdocRepository kdocRepository; + + public KdocService(KdocRepository kdocRepository) { + this.kdocRepository = kdocRepository; + } + + public List getAllKdocs(){ + return kdocRepository.findAll(); + } + +// @RequestMapping("/documents") +// public String index() { +// +// String url = "https://realpython.com/python-sockets/"; +// +// // Selenium. Using Selenium because jsoup doesn't handle JS and lazy loading. +// // System.setProperty("webdriver.chrome.driver", "/home/dave/chromedriver"); +// ChromeOptions options = new ChromeOptions();options.addArguments("--headless"); +// //WebDriver driver = new ChromeDriver(options); +// +// // Using Webdriver +// WebDriverManager.chromedriver().setup(); +// WebDriver driver = new ChromeDriver(options); +// +// // ToDo: Receive URL from frontend. +// driver.get(url); +// +// // Get the raw HTML source. +// String html = driver.getPageSource(); +// +// // Parse with Jsoup, so we can work with it.; +// Document doc = Jsoup.parse(html); +// +// // ToDo: Do a bit of sanitization on the HTML before passing to the PDF service. +// String safe = Jsoup.clean(doc.html(), Safelist.basic()); +// +// // Process with the readability4j mozilla readability.js wrapper. +// Readability4J readability4J = new Readability4J(url, doc); +// net.dankito.readability4j.Article article = readability4J.parse(); +// +// // returns extracted content in a
element +// String extractedContentHtml = article.getContent(); +// // to get content wrapped in tags and encoding set to UTF-8, see chapter 'Output encoding' +// String extractedContentHtmlWithUtf8Encoding = article.getContentWithUtf8Encoding(); +// String extractedContentPlainText = article.getTextContent(); +// String title = article.getTitle(); +// String byline = article.getByline(); +// String excerpt = article.getExcerpt(); +// +// +// // Get the reading ease score. +// double ease = ReadabilityCalculator.calculateReadingEase(extractedContentPlainText); +// +// // Get the grade level score. +// double grade = ReadabilityCalculator.calculateGradeLevel(extractedContentPlainText); +// +// // Check it's working +// System.out.println(ease + " " + grade); +// +// // Testing the sizing of the document for Azure +//// System.out.println(">>>>>>>>>>>>>>>>>>>>>> " + extractedContentPlainText.length()); +//// System.out.println(">>>>>>>>>>>>>>>>>>>>>> " + WordUtils.wrap(extractedContentPlainText, 40)); +// +// // ToDo: Add user, title, excerpt, byline, content, ease, grade and tag array to model. +// +// // Return the clean HTML +// return extractedContentHtml; +// } + + +} + diff --git a/src/main/java/io/klutter/pdfservice/PdfService.java b/src/main/java/io/klutter/services/PdfService.java similarity index 75% rename from src/main/java/io/klutter/pdfservice/PdfService.java rename to src/main/java/io/klutter/services/PdfService.java index bcb715e..e5572bf 100644 --- a/src/main/java/io/klutter/pdfservice/PdfService.java +++ b/src/main/java/io/klutter/services/PdfService.java @@ -1,18 +1,9 @@ -package io.klutter.pdfservice; +package io.klutter.services; import com.github.jhonnymertz.wkhtmltopdf.wrapper.Pdf; -import com.github.jhonnymertz.wkhtmltopdf.wrapper.configurations.WrapperConfig; import com.github.jhonnymertz.wkhtmltopdf.wrapper.params.Param; -import io.github.bonigarcia.wdm.WebDriverManager; -import net.dankito.readability4j.Readability4J; -import okhttp3.HttpUrl; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.safety.Safelist; -import org.openqa.selenium.WebDriver; -import org.openqa.selenium.chrome.ChromeDriver; -import org.openqa.selenium.chrome.ChromeOptions; -import org.openqa.selenium.firefox.FirefoxDriver; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; diff --git a/src/main/java/io/klutter/textanalytics/TextAnalyticsService.java b/src/main/java/io/klutter/textanalytics/TextAnalyticsService.java deleted file mode 100644 index 258a1e5..0000000 --- a/src/main/java/io/klutter/textanalytics/TextAnalyticsService.java +++ /dev/null @@ -1,8 +0,0 @@ -package io.klutter.textanalytics; - - -// Encapsulation of 3rd party API from Azure. This API tends to change over time (typical MS) -// this way, I can isolate the impacteded code if something changes. - -public class TextAnalyticsService { -} diff --git a/src/main/java/io/klutter/wavenetservice/WavenetService.java b/src/main/java/io/klutter/wavenetservice/WavenetService.java deleted file mode 100644 index 06d7d45..0000000 --- a/src/main/java/io/klutter/wavenetservice/WavenetService.java +++ /dev/null @@ -1,4 +0,0 @@ -package io.klutter.wavenetservice; - -public class WavenetService { -} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index d9eaa4c..b2e763b 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -8,6 +8,8 @@ spring.mvc.pathmatch.matching-strategy=ant_path_matcher # ToDo: Remove for production # For logging H2 during dev. logging.level.org.springframework.jdbc.datasource.init.ScriptUtils=debug +spring.h2.console.enabled=true +spring.datasource.url=jdbc:h2:mem:testdatabase # Tell JPA that I'll handle the schema, just connect to the DB. spring.jpa.hibernate.ddl-auto=none