Skip to content

Commit

Permalink
Accept partial object (URL only) via ThymeLeaf on home page.
Browse files Browse the repository at this point in the history
Create DeclutterService
  • Loading branch information
davedavis committed Mar 12, 2022
1 parent 9e74a24 commit 8e4ec8b
Show file tree
Hide file tree
Showing 13 changed files with 702 additions and 205 deletions.
13 changes: 8 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,17 @@
<version>1.9</version>
</dependency>

<!-- Swagger dependency for API documentation -->
<dependency>
<groupId>io.springfox</groupId>
<artifactId>springfox-boot-starter</artifactId>
<version>3.0.0</version>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-ui</artifactId>
<version>1.6.6</version>
</dependency>


<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20210307</version>
</dependency>



Expand Down
22 changes: 0 additions & 22 deletions src/main/java/io/klutter/SpringFoxConfig.java

This file was deleted.

134 changes: 134 additions & 0 deletions src/main/java/io/klutter/controllers/BakWebController.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import io.github.bonigarcia.wdm.WebDriverManager;
import io.klutter.data.KdocRepository;
import io.klutter.models.Kdoc;
import io.klutter.services.KdocService;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import io.whelk.flesch.kincaid.ReadabilityCalculator;
import net.dankito.readability4j.Readability4J;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Safelist;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.util.MultiValueMap;
import org.springframework.web.bind.annotation.*;

import java.nio.charset.StandardCharsets;
import java.util.List;


import com.azure.core.exception.ResourceNotFoundException;
import com.azure.core.http.HttpResponse;
import com.fasterxml.jackson.core.JsonProcessingException;

import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import io.github.bonigarcia.wdm.WebDriverManager;
import io.klutter.data.KdocRepository;
import io.klutter.models.Kdoc;


import io.whelk.flesch.kincaid.ReadabilityCalculator;
import net.dankito.readability4j.Readability4J;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Safelist;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;

import java.util.List;
import java.util.Optional;

@Controller
public class BakWebController {

// Set up the repository reference.
@Autowired
KdocRepository kdocRepository;


// @RequestMapping(value = "/", method = RequestMethod.POST)
// ResponseEntity<Kdoc> generateKdocFromUrl(@RequestBody Kdoc submittedKdoc) {
// System.out.println(submittedKdoc.toString());
// // Decode the URL encoded string if it's encoded.
// String decoded = java.net.URLDecoder.decode(submittedKdoc.getUrl(), StandardCharsets.UTF_8);
// System.out.println(">>>>>>>>>>>>>>>>>>>>>>>> Standard " + submittedKdoc );
// System.out.println(">>>>>>>>>>>>>>>>>>>>>>>> Decoded " + decoded );
//
//
// Kdoc savedKdoc = kdocRepository.save(submittedKdoc);
// // ToDo: Change to savedkDoc
// return new ResponseEntity<Kdoc>(submittedKdoc, HttpStatus.OK);
// }

@RequestMapping(value = "/declutter2", method = RequestMethod.GET)
public String index() {
String url = "https://www.androidauthority.com/best-android-13-features-3113880/";

// Selenium. Using Selenium because jsoup doesn't handle JS and lazy loading.
System.setProperty("webdriver.chrome.driver", "/home/dave/chromedriver");
ChromeOptions options = new ChromeOptions();options.addArguments("--headless");
//WebDriver driver = new ChromeDriver(options);

// Using Webdriver
WebDriverManager.chromedriver().setup();
WebDriver driver = new ChromeDriver(options);

// ToDo: Receive URL from frontend.
driver.get(url);

// Get the raw HTML source.
String html = driver.getPageSource();

// Parse with Jsoup, so we can work with it.;
Document doc = Jsoup.parse(html);

// ToDo: Do a bit of sanitization on the HTML before passing to the PDF service.
String safe = Jsoup.clean(doc.html(), Safelist.basic());

// Process with the readability4j mozilla readability.js wrapper.
Readability4J readability4J = new Readability4J(url, doc);
net.dankito.readability4j.Article article = readability4J.parse();

// returns extracted content in a <div> element
String extractedContentHtml = article.getContent();
// to get content wrapped in <html> tags and encoding set to UTF-8, see chapter 'Output encoding'
String extractedContentHtmlWithUtf8Encoding = article.getContentWithUtf8Encoding();
String extractedContentPlainText = article.getTextContent();
String title = article.getTitle();
String byline = article.getByline();
String excerpt = article.getExcerpt();


// Get the reading ease score.
double ease = ReadabilityCalculator.calculateReadingEase(extractedContentPlainText);

// Get the grade level score.
double grade = ReadabilityCalculator.calculateGradeLevel(extractedContentPlainText);

// Check it's working
System.out.println(ease + " " + grade);

// Testing the sizing of the document for Azure
// System.out.println(">>>>>>>>>>>>>>>>>>>>>> " + extractedContentPlainText.length());
// System.out.println(">>>>>>>>>>>>>>>>>>>>>> " + WordUtils.wrap(extractedContentPlainText, 40));

// ToDo: Add user, title, excerpt, byline, content, ease, grade and tag array to model.

// Return the clean HTML
return extractedContentHtml;
}
}
27 changes: 0 additions & 27 deletions src/main/java/io/klutter/controllers/KdocRestController.java

This file was deleted.

36 changes: 36 additions & 0 deletions src/main/java/io/klutter/controllers/WebController.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package io.klutter.controllers;

import io.klutter.models.Kdoc;
import io.klutter.services.KdocService;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;


@Controller
@RequestMapping("/")
public class WebController {

/* Homepage. Added ModelAttribute, so I could use ThymeLeaf
and have the user submit a URL as a Kdoc object right from
the homepage form.*/
@GetMapping()
public String index(@ModelAttribute Kdoc kdoc, Model model) {
model.addAttribute("kdoc", kdoc);
return "index";
}

// private final KdocService kdocService;
//
// public WebController(KdocService kdocService) {
// this.kdocService = kdocService;
// }
//
// @GetMapping
// public List<Kdoc> listAllKdocs(){
// return kdocService.getAllKdocs();
// }
}
2 changes: 2 additions & 0 deletions src/main/java/io/klutter/data/KdocRepository.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,7 @@ public interface KdocRepository extends CrudRepository<Kdoc, Long> {
@NotNull
List<Kdoc> findAll();

Boolean existsKdocByUrl (String url);


}
5 changes: 5 additions & 0 deletions src/main/java/io/klutter/models/Kdoc.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,15 @@ public Kdoc(String url, String title, String excerpt, String byline, String cont
this.pdf = pdf;
}


public Long getId() {
return id;
}

public void setId(Long id) {
this.id = id;
}

public String getTitle() {
return title;
}
Expand Down
Loading

0 comments on commit 8e4ec8b

Please sign in to comment.