Skip to content
This repository was archived by the owner on Oct 30, 2018. It is now read-only.

Commit 6d5396f

Browse files
committed
#8 use jsoup over default java xml libs
1 parent 76b0940 commit 6d5396f

File tree

6 files changed

+74
-122
lines changed

6 files changed

+74
-122
lines changed

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
<artifactId>commons-lang3</artifactId>
2525
<version>3.4</version>
2626
</dependency>
27+
<dependency>
28+
<groupId>org.jsoup</groupId>
29+
<artifactId>jsoup</artifactId>
30+
<version>1.8.2</version>
31+
</dependency>
32+
2733

2834
<dependency>
2935
<groupId>log4j</groupId>

src/main/java/com/alchemyapi/api/AlchemyApi.java

Lines changed: 40 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,22 @@
1414
import com.alchemyapi.api.parameters.TargetedSentimentParameters;
1515
import com.alchemyapi.api.parameters.TaxonomyParameters;
1616
import com.alchemyapi.api.parameters.TextParameters;
17+
import org.apache.commons.io.IOUtils;
1718
import org.apache.log4j.Logger;
18-
import org.w3c.dom.Document;
19-
import org.w3c.dom.NodeList;
19+
import org.jsoup.Jsoup;
20+
import org.jsoup.nodes.Document;
21+
import org.jsoup.nodes.Element;
22+
import org.jsoup.parser.Parser;
2023
import org.xml.sax.SAXException;
2124

22-
import javax.xml.parsers.DocumentBuilderFactory;
2325
import javax.xml.parsers.ParserConfigurationException;
24-
import javax.xml.xpath.XPath;
25-
import javax.xml.xpath.XPathConstants;
26-
import javax.xml.xpath.XPathExpression;
2726
import javax.xml.xpath.XPathExpressionException;
28-
import javax.xml.xpath.XPathFactory;
29-
import java.io.DataInputStream;
3027
import java.io.DataOutputStream;
3128
import java.io.IOException;
3229
import java.net.HttpURLConnection;
3330
import java.net.URL;
3431
import java.nio.charset.Charset;
3532

36-
import static org.apache.commons.lang3.StringUtils.isBlank;
37-
import static org.apache.commons.lang3.StringUtils.isNotBlank;
3833
import static org.apache.commons.lang3.StringUtils.length;
3934
import static org.apache.commons.lang3.StringUtils.trimToEmpty;
4035

@@ -510,32 +505,32 @@ public Document imageGetRankedImageKeywords(final ImageParameters params) {
510505
outputStream.write(image);
511506
outputStream.close();
512507

513-
return doRequest(handle, params.getOutputMode());
508+
return doRequest(handle, params);
514509

515510
} catch(IOException e) {
516511
throw new AlchemyApiException(e);
517512
}
518513
}
519514

520-
private Document get(final String callName, final String callPrefix, final Parameters params) {
515+
private Document get(final String callName, final String callPrefix, final Parameters parameters) {
521516
try {
522-
final String urlQuery = "?apikey=" + configuration.getApiKey() + params.getUrlQuery();
517+
final String urlQuery = "?apikey=" + configuration.getApiKey() + parameters.getUrlQuery();
523518
final URL url = new URL(buildBaseApiUrl() + callPrefix + "/" + callName + urlQuery);
524519

525520
final HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
526521
httpURLConnection.setDoOutput(true);
527522

528-
return doRequest(httpURLConnection, params.getOutputMode());
523+
return doRequest(httpURLConnection, parameters);
529524

530525
} catch(IOException e) {
531526
throw new AlchemyApiException(e);
532527
}
533528
}
534529

535-
private Document post(final String callName, final String callPrefix, final Parameters params) {
530+
private Document post(final String callName, final String callPrefix, final Parameters parameters) {
536531
try {
537532
final URL url = new URL(buildBaseApiUrl() + callPrefix + "/" + callName);
538-
final String data = "apikey=" + configuration.getApiKey() + params.getUrlQuery();
533+
final String data = "apikey=" + configuration.getApiKey() + parameters.getUrlQuery();
539534

540535
final HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
541536
httpURLConnection.setDoOutput(true);
@@ -545,7 +540,7 @@ private Document post(final String callName, final String callPrefix, final Para
545540
outputStream.write(data.getBytes(Charset.forName("UTF-8")));
546541
outputStream.close();
547542

548-
return doRequest(httpURLConnection, params.getOutputMode());
543+
return doRequest(httpURLConnection, parameters);
549544

550545
} catch(IOException e) {
551546
throw new AlchemyApiException(e);
@@ -554,74 +549,62 @@ private Document post(final String callName, final String callPrefix, final Para
554549

555550
// TODO add json handling
556551
// TODO return pojo with parsed field, but allow a "raw" xml/json getter to protect against api updates
557-
private Document doRequest(final HttpURLConnection httpURLConnection, final String outputMode) {
552+
private Document doRequest(final HttpURLConnection httpURLConnection, final Parameters parameters) {
558553
try {
559-
final DataInputStream inputStream = new DataInputStream(httpURLConnection.getInputStream());
560-
final Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputStream);
561-
562-
inputStream.close();
554+
final String response = IOUtils.toString(httpURLConnection.getInputStream());
563555
httpURLConnection.disconnect();
564556

565-
switch (outputMode) {
557+
switch (parameters.getOutputMode()) {
566558
case Parameters.OUTPUT_XML:
567-
return parseXml(document);
559+
return parseXml(response, parameters);
568560

569561
case Parameters.OUTPUT_RDF:
570-
return praseRdf(document);
562+
return praseRdf(response, parameters);
571563

572564
case Parameters.OUTPUT_JSON:
573565
throw new AlchemyApiException("Json Response not supported yet");
574-
}
575-
return document;
576566

577-
} catch (SAXException | ParserConfigurationException | IOException e) {
567+
default:
568+
throw new AlchemyApiException("Unknown output mode, must be one of [xml,rdf,json]");
569+
}
570+
} catch (IOException e) {
578571
throw new AlchemyApiException(e);
579572
}
580573
}
581574

582-
private Document parseXml(final Document document) {
583-
final XPathFactory factory = XPathFactory.newInstance();
584-
final String status = getNodeValue(factory, document, "/results/status/text()");
585-
if (isBlank(status) || !status.equals("OK")) {
586-
final String statusInfo = getNodeValue(factory, document, "/results/statusInfo/text()");
587-
if (isNotBlank(statusInfo)) {
588-
throw new AlchemyApiException("Error making API call: " + statusInfo);
589-
}
590-
throw new AlchemyApiException("Error making API call: " + status);
591-
}
592-
return document;
593-
}
575+
private Document parseXml(final String response, final Parameters parameters) {
576+
final Document document = Jsoup.parse(response, parameters.getEncoding(), Parser.xmlParser());
594577

595-
private Document praseRdf(final Document document) {
596-
final XPathFactory factory = XPathFactory.newInstance();
597-
final String status = getNodeValue(factory, document, "//RDF/Description/ResultStatus/text()");
598-
if (isBlank(status) || !status.equals("OK")) {
599-
final String statusInfo = getNodeValue(factory, document, "//RDF/Description/ResultStatus/text()");
600-
if (isNotBlank(statusInfo)) {
578+
final Element status = document.select("results > status").first();
579+
if (status == null || !status.text().equals("OK")) {
580+
final Element statusInfo = document.select("results > statusInfo").first();
581+
if (statusInfo != null) {
601582
throw new AlchemyApiException("Error making API call: " + statusInfo);
602583
}
603584
throw new AlchemyApiException("Error making API call: " + status);
604585
}
605586
return document;
606587
}
607588

608-
private String getNodeValue(XPathFactory factory, Document doc, String xpathStr) {
609-
try {
610-
final XPath xpath = factory.newXPath();
611-
final XPathExpression expr = xpath.compile(xpathStr);
612-
final Object result = expr.evaluate(doc, XPathConstants.NODESET);
613-
final NodeList results = (NodeList) result;
614-
615-
if(results.getLength() == 0 || results.item(0) == null) { return null; }
616-
return results.item(0).getNodeValue();
589+
// TODO investigate rdf format
590+
private Document praseRdf(final String response, final Parameters parameters) {
591+
final Document document = Jsoup.parse(response, parameters.getEncoding(), Parser.xmlParser());
617592

618-
} catch (XPathExpressionException e) {
619-
throw new AlchemyApiException(e);
593+
final Element status = document.select("RDF > Description > ResultStatus").first();
594+
if (status == null || !status.text().equals("OK")) {
595+
throw new AlchemyApiException("Error making API call: " + status);
620596
}
597+
return document;
621598
}
622599

623600
private String buildBaseApiUrl() {
624601
return API_URL.replace("{SUB_DOMAIN}", configuration.getApiSubDomain());
625602
}
626603

604+
private String parseBaseUrl(final HttpURLConnection httpURLConnection) {
605+
final URL url = httpURLConnection.getURL();
606+
String path = url.getFile().substring(0, url.getFile().lastIndexOf('/'));
607+
return url.getProtocol() + "://" + url.getHost() + path;
608+
}
609+
627610
}

src/main/java/com/alchemyapi/api/parameters/Parameters.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public class Parameters {
2121
private String text;
2222
private String outputMode = OUTPUT_XML; // TODO make json default
2323
private String customParameters;
24+
private String encoding = "UTF-8";
2425

2526
public String getUrl() {
2627
return url;
@@ -49,6 +50,14 @@ public void setText(String text) {
4950
this.text = text;
5051
}
5152

53+
public String getEncoding() {
54+
return encoding;
55+
}
56+
57+
public void setEncoding(final String encoding) {
58+
this.encoding = encoding;
59+
}
60+
5261
public String getOutputMode() {
5362
return outputMode;
5463
}
Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,28 @@
11
package com.alchemyapi;
22

33
import com.alchemyapi.api.AlchemyApi;
4-
import com.alchemyapi.helpers.DocumentUtils;
54
import com.alchemyapi.helpers.ResourceUtils;
65
import com.alchemyapi.helpers.TestApiFactory;
6+
import org.jsoup.nodes.Document;
77
import org.junit.Test;
8-
import org.w3c.dom.Document;
9-
import org.xml.sax.SAXException;
108

11-
import javax.xml.parsers.ParserConfigurationException;
12-
import javax.xml.xpath.XPathExpressionException;
139
import java.io.File;
14-
import java.io.IOException;
1510

1611
public class STestAuthor {
1712

1813
private final AlchemyApi alchemyApi = TestApiFactory.build(new File(System.getProperty("user.home"), ".alchemy/api.key"));
1914

2015
@Test
21-
public void parseFromTestData() throws SAXException, ParserConfigurationException, XPathExpressionException, IOException {
16+
public void parseFromTestData() {
2217
final String html = ResourceUtils.toString("data/example.html");
23-
2418
final Document document = alchemyApi.htmlGetAuthor(html, "http://www.test.com/");
25-
System.out.println(DocumentUtils.toString(document));
19+
System.out.println(document);
2620
}
2721

2822
@Test
29-
public void parseFromUrl() throws SAXException, ParserConfigurationException, XPathExpressionException, IOException {
23+
public void parseFromUrl() {
3024
final Document document = alchemyApi.urlGetAuthor("http://www.politico.com/blogs/media/2012/02/detroit-news-ed-upset-over-romney-edit-115247.html");
31-
System.out.println(DocumentUtils.toString(document));
25+
System.out.println(document);
3226
}
3327

3428
}

src/test/java/com/alchemyapi/STestSentiment.java

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,12 @@
44
import com.alchemyapi.api.parameters.KeywordParameters;
55
import com.alchemyapi.api.parameters.NamedEntityParameters;
66
import com.alchemyapi.api.parameters.TargetedSentimentParameters;
7-
import com.alchemyapi.helpers.DocumentUtils;
87
import com.alchemyapi.helpers.ResourceUtils;
98
import com.alchemyapi.helpers.TestApiFactory;
9+
import org.jsoup.nodes.Document;
1010
import org.junit.Test;
11-
import org.w3c.dom.Document;
12-
import org.xml.sax.SAXException;
1311

14-
import javax.xml.parsers.ParserConfigurationException;
15-
import javax.xml.xpath.XPathExpressionException;
1612
import java.io.File;
17-
import java.io.IOException;
1813

1914
public class STestSentiment {
2015

@@ -23,60 +18,60 @@ public class STestSentiment {
2318
@Test
2419
public void url() {
2520
final Document document = alchemyApi.urlGetTextSentiment("http://www.techcrunch.com/");
26-
System.out.println(DocumentUtils.toString(document));
21+
System.out.println(document);
2722
}
2823

2924
@Test
30-
public void text() throws SAXException, ParserConfigurationException, XPathExpressionException, IOException {
25+
public void text() {
3126
final Document document = alchemyApi.textGetTextSentiment("That hat is ridiculous, Charles.");
32-
System.out.println(DocumentUtils.toString(document));
27+
System.out.println(document);
3328
}
3429

3530
@Test
36-
public void file() throws SAXException, ParserConfigurationException, XPathExpressionException, IOException {
31+
public void file() {
3732
final String html = ResourceUtils.toString("data/example.html");
3833
final Document document = alchemyApi.htmlGetTextSentiment(html, "http://www.test.com/");
39-
System.out.println(DocumentUtils.toString(document));
34+
System.out.println(document);
4035
}
4136

4237
@Test
43-
public void entityTargetedSentimentText() throws SAXException, ParserConfigurationException, XPathExpressionException, IOException {
38+
public void entityTargetedSentimentText() {
4439
final NamedEntityParameters entityParams = new NamedEntityParameters();
4540
entityParams.setSentiment(true);
4641
final Document document = alchemyApi.textGetRankedNamedEntities("That Mike Tyson is such a sweetheart.", entityParams);
47-
System.out.println(DocumentUtils.toString(document));
42+
System.out.println(document);
4843

4944
final KeywordParameters keywordParams = new KeywordParameters();
5045
keywordParams.setSentiment(true);
5146
final Document document2 = alchemyApi.textGetRankedKeywords("That Mike Tyson is such a sweetheart.", keywordParams);
52-
System.out.println(DocumentUtils.toString(document2));
47+
System.out.println(document2);
5348

5449
final TargetedSentimentParameters sentimentParams = new TargetedSentimentParameters();
5550
sentimentParams.setShowSourceText(true);
5651
final Document document3 = alchemyApi.textGetTargetedSentiment("This car is terrible.", "car", sentimentParams);
57-
System.out.print(DocumentUtils.toString(document3));
52+
System.out.print(document3);
5853
}
5954

6055
@Test
61-
public void entityTargetedSentimentUrl() throws SAXException, ParserConfigurationException, XPathExpressionException, IOException {
56+
public void entityTargetedSentimentUrl() {
6257
final TargetedSentimentParameters sentimentParams = new TargetedSentimentParameters();
6358
sentimentParams.setShowSourceText(true);
6459

6560
final Document document = alchemyApi.urlGetTargetedSentiment(
6661
"http://techcrunch.com/2012/03/01/keen-on-anand-rajaraman-how-walmart-wants-to-leapfrog-over-amazon-tctv/",
6762
"Walmart",
6863
sentimentParams);
69-
System.out.print(DocumentUtils.toString(document));
64+
System.out.print(document);
7065
}
7166

7267
@Test
73-
public void entityTargetedSentimentHtml() throws SAXException, ParserConfigurationException, XPathExpressionException, IOException {
68+
public void entityTargetedSentimentHtml() {
7469
final TargetedSentimentParameters sentimentParams = new TargetedSentimentParameters();
7570
sentimentParams.setShowSourceText(true);
7671

7772
final String html = ResourceUtils.toString("data/example.html");
7873
final Document document2 = alchemyApi.htmlGetTargetedSentiment(html, "http://www.test.com/", "WujWuj", sentimentParams);
79-
System.out.print(DocumentUtils.toString(document2));
74+
System.out.print(document2);
8075
}
8176

8277
}

src/test/java/com/alchemyapi/helpers/DocumentUtils.java

Lines changed: 0 additions & 35 deletions
This file was deleted.

0 commit comments

Comments
 (0)