From ce94d88a175a88b5664a9ae5fac99f574bb543f4 Mon Sep 17 00:00:00 2001 From: Dave Davis Date: Mon, 21 Feb 2022 00:45:36 +0000 Subject: [PATCH] Add Dockerfile for application. Add Application Event listeners for debugging. Add H2 DB for Sprint 0 Add first Kdoc Entity Add sample schema and data for testing --- .gitignore | 1 + Dockerfile | 14 ++ Readme.md | 6 +- pom.xml | 32 ++++- ...utterService.java => DocumentService.java} | 11 +- .../java/io/klutter/KlutterApplication.java | 21 ++- src/main/java/io/klutter/SpringFoxConfig.java | 22 +++ .../java/io/klutter/dao/KdocRepository.java | 12 ++ src/main/java/io/klutter/entity/Kdoc.java | 134 ++++++++++++++++++ .../textanalytics/TextAnalyticsService.java | 8 ++ src/main/resources/application.properties | 13 ++ src/main/resources/application.yml | 2 + src/main/resources/bootstrap.yml | 3 + src/main/resources/static/index.html | 86 +++++++++++ 14 files changed, 357 insertions(+), 8 deletions(-) create mode 100755 Dockerfile rename src/main/java/io/klutter/{declutterservice/DeclutterService.java => DocumentService.java} (89%) create mode 100644 src/main/java/io/klutter/SpringFoxConfig.java create mode 100644 src/main/java/io/klutter/dao/KdocRepository.java create mode 100644 src/main/java/io/klutter/entity/Kdoc.java create mode 100644 src/main/java/io/klutter/textanalytics/TextAnalyticsService.java create mode 100644 src/main/resources/application.yml create mode 100644 src/main/resources/bootstrap.yml create mode 100644 src/main/resources/static/index.html diff --git a/.gitignore b/.gitignore index c1a2498..c66f74c 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ build/ /.mvn/ /mvnw /mvnw.cmd +/secrets/ diff --git a/Dockerfile b/Dockerfile new file mode 100755 index 0000000..d87ff77 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM maven:3.8.4-openjdk-11-slim as BUILDER +ARG VERSION=0.0.1-SNAPSHOT +WORKDIR /build/ +COPY pom.xml /build/ +COPY src /build/src/ + +RUN mvn clean package +COPY target/Klutter-${VERSION}.jar target/application.jar + +FROM openjdk:11.0.8-jre-slim +WORKDIR /app/ + +COPY --from=BUILDER /build/target/application.jar /app/ +CMD java -jar /app/application.jar \ No newline at end of file diff --git a/Readme.md b/Readme.md index 67562ea..e5b203a 100644 --- a/Readme.md +++ b/Readme.md @@ -33,7 +33,11 @@ https://davedavis.atlassian.net/jira/software/c/projects/PDFIT/boards/4/backlog - +## ToDo +Add user, title, excerpt, byline, content, ease, grade and tag array to model. + ## Ideas -- WaveNet service \ No newline at end of file +- WaveNet service +- Text analytics and sentiment analysis service \ No newline at end of file diff --git a/pom.xml b/pom.xml index 9c625ba..0fbc5be 100644 --- a/pom.xml +++ b/pom.xml @@ -18,11 +18,21 @@ 11 + + + + org.springframework.boot spring-boot-starter-data-jpa + + + org.springframework.data + spring-data-commons + + org.springframework.boot spring-boot-starter-thymeleaf @@ -35,7 +45,6 @@ com.h2database h2 - runtime org.mariadb.jdbc @@ -117,7 +126,28 @@ 5.1.0 + + + com.azure + azure-ai-textanalytics + 5.1.5 + + + + + + + org.apache.commons + commons-text + 1.9 + + + + io.springfox + springfox-boot-starter + 3.0.0 + diff --git a/src/main/java/io/klutter/declutterservice/DeclutterService.java b/src/main/java/io/klutter/DocumentService.java similarity index 89% rename from src/main/java/io/klutter/declutterservice/DeclutterService.java rename to src/main/java/io/klutter/DocumentService.java index e12d391..e5ab8ad 100644 --- a/src/main/java/io/klutter/declutterservice/DeclutterService.java +++ b/src/main/java/io/klutter/DocumentService.java @@ -1,9 +1,8 @@ -package io.klutter.declutterservice; +package io.klutter; import io.github.bonigarcia.wdm.WebDriverManager; import io.whelk.flesch.kincaid.ReadabilityCalculator; import net.dankito.readability4j.Readability4J; -import okhttp3.HttpUrl; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.safety.Safelist; @@ -17,9 +16,9 @@ @RestController @RequestMapping("/api/v1") -public class DeclutterService { +public class DocumentService { - @RequestMapping("/declutter") + @RequestMapping("/documents") public String index() throws IOException { String url = "https://realpython.com/python-sockets/"; @@ -68,6 +67,10 @@ public String index() throws IOException { // Check it's working System.out.println(ease + " " + grade); + // Testing the sizing of the document for Azure +// System.out.println(">>>>>>>>>>>>>>>>>>>>>> " + extractedContentPlainText.length()); +// System.out.println(">>>>>>>>>>>>>>>>>>>>>> " + WordUtils.wrap(extractedContentPlainText, 40)); + // ToDo: Add user, title, excerpt, byline, content, ease, grade and tag array to model. // Return the clean HTML diff --git a/src/main/java/io/klutter/KlutterApplication.java b/src/main/java/io/klutter/KlutterApplication.java index 2d3d0ee..5a4f478 100644 --- a/src/main/java/io/klutter/KlutterApplication.java +++ b/src/main/java/io/klutter/KlutterApplication.java @@ -1,16 +1,33 @@ package io.klutter; +import io.klutter.dao.KdocRepository; +import io.klutter.entity.Kdoc; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.context.event.ApplicationReadyEvent; +import org.springframework.context.event.EventListener; //Add the scanBasePackages parameter to the annotation as I added my services in //separate packages so, they need to be configured on application start. -@SpringBootApplication(scanBasePackages = {"io.klutter.declutterservice", "io.klutter.pdfservice"} ) +@SpringBootApplication(scanBasePackages = {"io.klutter.declutterservice", "io.klutter.pdfservice", "io.klutter.dao"} ) public class KlutterApplication { + private final KdocRepository kdocRepository; + + public KlutterApplication(KdocRepository kdocRepository) { + this.kdocRepository = kdocRepository; + } + public static void main(String[] args) { SpringApplication.run(KlutterApplication.class, args); - System.out.println("Application Running: http://localhost:8080"); + } + + // So I can visualize data access working. + // So I can wait for Spring to set up the DB before I start playing around with DB. + @EventListener(ApplicationReadyEvent.class) + public void EventListenerExecute() { + Iterable kdocs = this.kdocRepository.findAll(); + kdocs.forEach(System.out::println); } } diff --git a/src/main/java/io/klutter/SpringFoxConfig.java b/src/main/java/io/klutter/SpringFoxConfig.java new file mode 100644 index 0000000..6f7df3b --- /dev/null +++ b/src/main/java/io/klutter/SpringFoxConfig.java @@ -0,0 +1,22 @@ +package io.klutter; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import springfox.documentation.builders.PathSelectors; +import springfox.documentation.builders.RequestHandlerSelectors; +import springfox.documentation.spi.DocumentationType; +import springfox.documentation.spring.web.plugins.Docket; + +@Configuration +public class SpringFoxConfig { + + @Bean + public Docket api() { + return new Docket(DocumentationType.SWAGGER_2) + .select() + .apis(RequestHandlerSelectors.any()) + .paths(PathSelectors.any()) + .build(); + } + +} diff --git a/src/main/java/io/klutter/dao/KdocRepository.java b/src/main/java/io/klutter/dao/KdocRepository.java new file mode 100644 index 0000000..ddbac95 --- /dev/null +++ b/src/main/java/io/klutter/dao/KdocRepository.java @@ -0,0 +1,12 @@ +package io.klutter.dao; + +import io.klutter.entity.Kdoc; +import org.springframework.data.repository.CrudRepository; +import org.springframework.stereotype.Repository; + +@Repository +public interface KdocRepository extends CrudRepository { + + // Additional non-CRUD related methods. + +} \ No newline at end of file diff --git a/src/main/java/io/klutter/entity/Kdoc.java b/src/main/java/io/klutter/entity/Kdoc.java new file mode 100644 index 0000000..cc08177 --- /dev/null +++ b/src/main/java/io/klutter/entity/Kdoc.java @@ -0,0 +1,134 @@ +package io.klutter.entity; + +import javax.persistence.*; +import java.io.Serializable; + +@Entity +@Table(name = "Kdoc") +public class Kdoc implements Serializable { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id") + private Long id; + + @Column(name = "title") + private String title; + + @Column(name = "url") + private String url; + + @Column(name = "excerpt") + private String excerpt; + + @Column(name = "byline") + private String byline; + + @Column(name = "content") + private String content; + + @Column(name = "ease") + private Float ease; + + @Column(name = "grade") + private Float grade; + + @Column(name = "pdf") + @Lob + private String pdf; + + + public Kdoc() { + } + + public Kdoc(String url, String title, String excerpt, String byline, String content, Float ease, Float grade, String pdf) { + this.title = title; + this.url = url; + this.excerpt = excerpt; + this.byline = byline; + this.content = content; + this.ease = ease; + this.grade = grade; + this.pdf = pdf; + } + + public Long getId() { + return id; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getUrl() { return url;} + + public void setUrl(String url) { this.url = url; } + + public String getExcerpt() { + return excerpt; + } + + public void setExcerpt(String excerpt) { + this.excerpt = excerpt; + } + + public String getByline() { + return byline; + } + + public void setByline(String byline) { + this.byline = byline; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public Float getEase() { + return ease; + } + + public void setEase(Float ease) { + this.ease = ease; + } + + public Float getGrade() { + return grade; + } + + public void setGrade(Float grade) { + this.grade = grade; + } + + public String getPdf() { + return pdf; + } + + public void setPdf(String pdf) { + this.pdf = pdf; + } + + + @Override + public String toString() { + return "Kdoc{" + + "id=" + id + + ", title='" + title + '\'' + + ", url='" + url + '\'' + + ", excerpt='" + excerpt + '\'' + + ", byline='" + byline + '\'' + + ", content='" + content + '\'' + + ", ease=" + ease + + ", grade=" + grade + + ", pdf='" + pdf + '\'' + + '}'; + } +} \ No newline at end of file diff --git a/src/main/java/io/klutter/textanalytics/TextAnalyticsService.java b/src/main/java/io/klutter/textanalytics/TextAnalyticsService.java new file mode 100644 index 0000000..258a1e5 --- /dev/null +++ b/src/main/java/io/klutter/textanalytics/TextAnalyticsService.java @@ -0,0 +1,8 @@ +package io.klutter.textanalytics; + + +// Encapsulation of 3rd party API from Azure. This API tends to change over time (typical MS) +// this way, I can isolate the impacteded code if something changes. + +public class TextAnalyticsService { +} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 8b13789..d9eaa4c 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1 +1,14 @@ +logging.level.org.hibernate.tool.hbm2ddl=DEBUG +logging.level.org.hibernate.SQL=DEBUG + +# So Swagger doesn't give out. +spring.mvc.pathmatch.matching-strategy=ant_path_matcher + + +# ToDo: Remove for production +# For logging H2 during dev. +logging.level.org.springframework.jdbc.datasource.init.ScriptUtils=debug + +# Tell JPA that I'll handle the schema, just connect to the DB. +spring.jpa.hibernate.ddl-auto=none diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..47fbb02 --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,2 @@ +server: + port: 8080 \ No newline at end of file diff --git a/src/main/resources/bootstrap.yml b/src/main/resources/bootstrap.yml new file mode 100644 index 0000000..4a7c9e5 --- /dev/null +++ b/src/main/resources/bootstrap.yml @@ -0,0 +1,3 @@ +spring: + application: + name: Klutter \ No newline at end of file diff --git a/src/main/resources/static/index.html b/src/main/resources/static/index.html new file mode 100644 index 0000000..2590192 --- /dev/null +++ b/src/main/resources/static/index.html @@ -0,0 +1,86 @@ + + + + + + + + + Klutter.io - Document Management for E-Ink Fans + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+

Welcome to Klutter

+

Submit your URL below to get a decluttered, ad-free version optimized for e-ink devices like + remarkable or kindle.

+
+
+ + +
You can save your document in your account or download immediately without an account..
+
+ +
+
+
+ + + + + + + +