diff --git a/indexing/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/IndexingServiceImpl.java b/indexing/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/IndexingServiceImpl.java index 0733d20..cc450c6 100644 --- a/indexing/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/IndexingServiceImpl.java +++ b/indexing/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/IndexingServiceImpl.java @@ -59,9 +59,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; -import com.google.common.escape.Escaper; import com.google.common.io.ByteStreams; -import com.google.common.net.UrlEscapers; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.MoreExecutors; @@ -81,8 +79,6 @@ import com.google.enterprise.cloudsearch.sdk.config.Configuration; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; import java.security.GeneralSecurityException; import java.util.ArrayList; import java.util.Arrays; @@ -148,7 +144,6 @@ public class IndexingServiceImpl extends BaseApiService implements public static final int DEFAULT_CONTENT_UPLOAD_THRESHOLD_BYTES = 100000; // ~100KB public static final Set API_SCOPES = Collections.singleton("https://www.googleapis.com/auth/cloud_search"); - private static final Escaper URL_PATH_SEGMENT_ESCAPER = UrlEscapers.urlPathSegmentEscaper(); private static final RequestMode DEFAULT_REQUEST_MODE = RequestMode.SYNCHRONOUS; private final String sourceId; @@ -1064,18 +1059,6 @@ private String getItemResourceName(String name) { return String.format(ITEM_RESOURCE_NAME_FORMAT, sourceId, escapeResourceName(name)); } - private static String escapeResourceName(String name) { - return URL_PATH_SEGMENT_ESCAPER.escape(name); - } - - private static String decodeResourceName(String name) { - try { - return URLDecoder.decode(name.replace("+", "%2B"), "UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new IllegalArgumentException("unable to decode resource name " + name, e); - } - } - private void removeResourcePrefix(Item item) { checkNotNull(item); checkArgument(!Strings.isNullOrEmpty(item.getName())); diff --git a/indexing/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/IndexingServiceTest.java b/indexing/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/IndexingServiceTest.java index 1dba7d0..14c6531 100644 --- a/indexing/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/IndexingServiceTest.java +++ b/indexing/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/IndexingServiceTest.java @@ -1037,7 +1037,7 @@ public void pushItem_escapesResourceName() throws IOException, InterruptedExcept verify(quotaServer).acquire(Operations.DEFAULT); verify(batchingService).pushItem(pushCaptor.capture()); - assertThat(pushCaptor.getValue().getName(), endsWith("/a+b%2Fc")); + assertThat(pushCaptor.getValue().getName(), endsWith("/a%2Bb%2Fc")); } @Test diff --git a/indexing/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/template/RepositoryDocTest.java b/indexing/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/template/RepositoryDocTest.java index b67ae59..5e5ff59 100644 --- a/indexing/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/template/RepositoryDocTest.java +++ b/indexing/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/template/RepositoryDocTest.java @@ -261,7 +261,7 @@ public void testItemContentAndChildLinks() throws IOException, InterruptedExcept .setItem(item) .setContent(content, ContentFormat.TEXT) .addChildId("id1", pushItem1) - .addChildId("id2", pushItem2) + .addChildId("id2+b;more", pushItem2) .build(); SettableFuture updateFuture = SettableFuture.create(); @@ -286,7 +286,7 @@ public void testItemContentAndChildLinks() throws IOException, InterruptedExcept verify(mockIndexingService) .indexItemAndContent(item, content, null, ContentFormat.TEXT, RequestMode.UNSPECIFIED); verify(mockIndexingService).push("id1", pushItem1); - verify(mockIndexingService).push("id2", pushItem2); + verify(mockIndexingService).push("id2+b;more", pushItem2); } @Test diff --git a/it/src/main/java/com/google/enterprise/cloudsearch/sdk/Util.java b/it/src/main/java/com/google/enterprise/cloudsearch/sdk/Util.java index 75d0e7d..77c0915 100644 --- a/it/src/main/java/com/google/enterprise/cloudsearch/sdk/Util.java +++ b/it/src/main/java/com/google/enterprise/cloudsearch/sdk/Util.java @@ -35,7 +35,10 @@ public static String getRandomId() { } public static String getItemId(String sourceId, String name) { - return "datasources/" + sourceId + "/items/" + name; + return "datasources/" + sourceId + "/items/" + BaseApiService.escapeResourceName(name); } + public static String unescapeItemName(String name) { + return BaseApiService.decodeResourceName(name); + } } diff --git a/it/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/MockItem.java b/it/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/MockItem.java index 76a871c..dcd7704 100644 --- a/it/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/MockItem.java +++ b/it/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/MockItem.java @@ -61,7 +61,7 @@ public MockItem(String itemName, Multimap values) { } public Item getItem() { - Item item = new IndexingItemBuilder(name) + return new IndexingItemBuilder(name) .setValues(values) .setTitle(FieldOrValue.withField(TITLE)) .setSourceRepositoryUrl(FieldOrValue.withField(URL)) @@ -78,7 +78,6 @@ public Item getItem() { .setPayload(getByteArrayValue(values, PAYLOAD)) .setAcl(MockItem.getSingleValue(values, ACL).orElse(null)) .build(); - return item; } private static ItemType getItemType(Multimap values) { diff --git a/it/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/TestUtils.java b/it/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/TestUtils.java index 3062cd3..7351221 100644 --- a/it/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/TestUtils.java +++ b/it/src/main/java/com/google/enterprise/cloudsearch/sdk/indexing/TestUtils.java @@ -15,14 +15,19 @@ */ package com.google.enterprise.cloudsearch.sdk.indexing; +import static com.google.enterprise.cloudsearch.sdk.Util.unescapeItemName; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static org.junit.Assert.assertEquals; import com.google.api.client.googleapis.json.GoogleJsonResponseException; +import com.google.api.client.json.jackson2.JacksonFactory; import com.google.api.services.cloudsearch.v1.model.Item; +import com.google.api.services.cloudsearch.v1.model.ItemMetadata; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.awaitility.Awaitility; import org.awaitility.Duration; @@ -79,16 +84,46 @@ public void waitUntilDeleted(String itemId) { } /** - * Asserts that the expected and the actual items match. + * Asserts that the expected and the actual items match. Tests tend to use this by + * passing in an Item constructed in the test class to compare with an Item from the + * server. Tests have an option of setting the name to a fully-qualified name + * (datasources/id/items/name) or just the name. This comparison compares the item + * names, ignoring any prefixes. * * @throws AssertionError - if the items don't match. */ private void assertItemsMatch(Item expected, Item actual) { - logger.log(Level.INFO, "Verifying item {0}...", actual); + logger.log(Level.INFO, "Comparing item \n{0} \nto \n{1}", new Object[] {expected, actual}); + // TODO(lchandramouli): verify all applicable meta data assertEquals("ACCEPTED", actual.getStatus().getCode()); - assertEquals(expected.getItemType(), actual.getItemType()); - assertEquals(expected.getMetadata(), actual.getMetadata()); - assertEquals(expected.getName(), actual.getName()); + assertEquals("name", getItemName(expected.getName()), getItemName(actual.getName())); + assertEquals("item type", expected.getItemType(), actual.getItemType()); + + ItemMetadata expectedMetadata = expected.getMetadata(); + ItemMetadata actualMetadata = actual.getMetadata(); + if (!expectedMetadata.equals(actualMetadata)) { + // toString() produces different output (expected does not contain quotes, actual + // does), so set a JSON factory here so assertEquals can highlight the differences. + expectedMetadata.setFactory(JacksonFactory.getDefaultInstance()); + assertEquals(expectedMetadata.toString(), actualMetadata.toString()); + } + } + + private static final Pattern NAME_PATTERN = Pattern.compile("^datasources/[^/]+/items/([^/]+)$"); + + /** + * For a fully-qualified id (datasources/sourceid/items/name), returns the name + * portion, unescaped, otherwise returns the name. + * + * @param name an item name + */ + private String getItemName(String name) { + Matcher matcher = NAME_PATTERN.matcher(name); + if (matcher.matches()) { + name = matcher.group(1); + return unescapeItemName(name); + } + return name; } } diff --git a/it/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/FakeIndexingRepositoryIT.java b/it/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/FakeIndexingRepositoryIT.java index d62e714..b9930e1 100644 --- a/it/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/FakeIndexingRepositoryIT.java +++ b/it/src/test/java/com/google/enterprise/cloudsearch/sdk/indexing/FakeIndexingRepositoryIT.java @@ -20,6 +20,7 @@ import static com.google.enterprise.cloudsearch.sdk.Util.PUBLIC_ACL; import static com.google.enterprise.cloudsearch.sdk.Util.getRandomId; import static java.util.Arrays.asList; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; @@ -192,8 +193,12 @@ public void fullTraversalOnePageTest() throws InterruptedException, IOException FakeIndexingRepository mockRepo = new FakeIndexingRepository.Builder() .addPage(asList(item)) .build(); - runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties())); - testUtils.waitUntilEqual(itemId, item.getItem()); + try { + runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties())); + testUtils.waitUntilEqual(itemId, item.getItem()); + } finally { + v1Client.deleteItemsIfExist(asList(itemId)); + } } @Test @@ -218,9 +223,13 @@ public void fullTraversalTwoPagesTest() throws InterruptedException, IOException .addPage(asList(itemPdf)) .addPage(asList(itemHtm)) .build(); - runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties())); - testUtils.waitUntilEqual(pdfItemId, itemPdf.getItem()); - testUtils.waitUntilEqual(htmItemId, itemHtm.getItem()); + try { + runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties())); + testUtils.waitUntilEqual(pdfItemId, itemPdf.getItem()); + testUtils.waitUntilEqual(htmItemId, itemHtm.getItem()); + } finally { + v1Client.deleteItemsIfExist(asList(pdfItemId, htmItemId)); + } } @Test @@ -770,6 +779,36 @@ public void incrementalChanges_deleteItem_succeeds() throws Exception { } } + @Test + public void indexing_charactersInId_succeeds() throws InterruptedException, IOException { + // Test all printable ASCII characters, a Latin-1 character, and a larger code point. + StringBuilder idBuilder = new StringBuilder("\u00f6\u20ac"); // o-umlaut Euro + for (int i = 32; i < 127; i++) { + idBuilder.append((char) i); + } + // This id is not qualified with "datasources//items/". It's intended to match + // what a typical Repository would use when creating an Item. + String rawItemId = "ItemIdTest_" + idBuilder.toString() + "_" + getRandomId(); + // Note: not local method getItemId; that one will add another random number to the end + String escapedFullId = Util.getItemId(indexingSourceId, rawItemId); + MockItem mockItem = new MockItem.Builder(rawItemId) + .setTitle("Item Id Test") + .setContentLanguage("en-us") + .setItemType(ItemType.CONTENT_ITEM.toString()) + .setAcl(PUBLIC_ACL) + .build(); + FakeIndexingRepository mockRepo = new FakeIndexingRepository.Builder() + .addPage(asList(mockItem)) + .build(); + + try { + runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties())); + testUtils.waitUntilEqual(escapedFullId, mockItem.getItem()); + } finally { + v1Client.deleteItemsIfExist(asList(escapedFullId)); + } + } + private void verifyStructuredData(String itemId, String schemaObjectType, Item expectedItem) throws IOException { Item actualItem = v1Client.getItem(itemId); @@ -790,6 +829,7 @@ private void runAwaitFullTraversalConnector(FakeIndexingRepository mockRepo, Str } private String getItemId(String itemName) { + // Util.getItemId returns datasources//items/ return Util.getItemId(indexingSourceId, itemName) + getRandomId(); } } diff --git a/sdk/src/main/java/com/google/enterprise/cloudsearch/sdk/BaseApiService.java b/sdk/src/main/java/com/google/enterprise/cloudsearch/sdk/BaseApiService.java index 7b02a73..0545712 100644 --- a/sdk/src/main/java/com/google/enterprise/cloudsearch/sdk/BaseApiService.java +++ b/sdk/src/main/java/com/google/enterprise/cloudsearch/sdk/BaseApiService.java @@ -39,11 +39,15 @@ import com.google.api.client.util.Beta; import com.google.api.client.util.ExponentialBackOff; import com.google.api.client.util.FieldInfo; +import com.google.api.client.util.escape.Escaper; +import com.google.api.client.util.escape.PercentEscaper; import com.google.common.base.Strings; import com.google.common.util.concurrent.AbstractIdleService; import com.google.enterprise.cloudsearch.sdk.StatsManager.OperationStats; import com.google.enterprise.cloudsearch.sdk.StatsManager.OperationStats.Event; +import java.net.URLDecoder; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.security.GeneralSecurityException; import java.util.Collection; import java.util.Collections; @@ -412,6 +416,31 @@ private static Object setDefaultValuesForPrimitiveTypes(Object object) { return object; } + /** + * This escaper behaves the same as CharEscapers.escapeUriPath, + * except that semi-colons are also escaped, and the same as + * UrlEscapers.urlPathSegmentEscaper, except that semi-colons and + * plus-signs are also escaped. Both characters cause problems + * somewhere in the Google API Client for Java. + * + * @see com.google.api.client.util.escape.CharEscapers#escapeUriPath + * @see com.google.common.net.UrlEscapers#urlPathSegmentEscaper + */ + private static final Escaper URL_PATH_SEGMENT_ESCAPER = + new PercentEscaper(".-~_@:!$&'()*,=", false); + + protected static String escapeResourceName(String name) { + return URL_PATH_SEGMENT_ESCAPER.escape(name); + } + + protected static String decodeResourceName(String name) { + try { + return URLDecoder.decode(name.replace("+", "%2B"), "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new IllegalArgumentException("unable to decode resource name " + name, e); + } + } + /** * Common execute method for all api requests. *