Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.escape.Escaper;
import com.google.common.io.ByteStreams;
import com.google.common.net.UrlEscapers;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.MoreExecutors;
Expand All @@ -81,8 +79,6 @@
import com.google.enterprise.cloudsearch.sdk.config.Configuration;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.security.GeneralSecurityException;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -148,7 +144,6 @@ public class IndexingServiceImpl extends BaseApiService<CloudSearch> implements
public static final int DEFAULT_CONTENT_UPLOAD_THRESHOLD_BYTES = 100000; // ~100KB
public static final Set<String> API_SCOPES =
Collections.singleton("https://www.googleapis.com/auth/cloud_search");
private static final Escaper URL_PATH_SEGMENT_ESCAPER = UrlEscapers.urlPathSegmentEscaper();
private static final RequestMode DEFAULT_REQUEST_MODE = RequestMode.SYNCHRONOUS;

private final String sourceId;
Expand Down Expand Up @@ -1064,18 +1059,6 @@ private String getItemResourceName(String name) {
return String.format(ITEM_RESOURCE_NAME_FORMAT, sourceId, escapeResourceName(name));
}

private static String escapeResourceName(String name) {
return URL_PATH_SEGMENT_ESCAPER.escape(name);
}

private static String decodeResourceName(String name) {
try {
return URLDecoder.decode(name.replace("+", "%2B"), "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException("unable to decode resource name " + name, e);
}
}

private void removeResourcePrefix(Item item) {
checkNotNull(item);
checkArgument(!Strings.isNullOrEmpty(item.getName()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,7 @@ public void pushItem_escapesResourceName() throws IOException, InterruptedExcept

verify(quotaServer).acquire(Operations.DEFAULT);
verify(batchingService).pushItem(pushCaptor.capture());
assertThat(pushCaptor.getValue().getName(), endsWith("/a+b%2Fc"));
assertThat(pushCaptor.getValue().getName(), endsWith("/a%2Bb%2Fc"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ public void testItemContentAndChildLinks() throws IOException, InterruptedExcept
.setItem(item)
.setContent(content, ContentFormat.TEXT)
.addChildId("id1", pushItem1)
.addChildId("id2", pushItem2)
.addChildId("id2+b;more", pushItem2)
.build();

SettableFuture<Item> updateFuture = SettableFuture.create();
Expand All @@ -286,7 +286,7 @@ public void testItemContentAndChildLinks() throws IOException, InterruptedExcept
verify(mockIndexingService)
.indexItemAndContent(item, content, null, ContentFormat.TEXT, RequestMode.UNSPECIFIED);
verify(mockIndexingService).push("id1", pushItem1);
verify(mockIndexingService).push("id2", pushItem2);
verify(mockIndexingService).push("id2+b;more", pushItem2);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ public static String getRandomId() {
}

public static String getItemId(String sourceId, String name) {
return "datasources/" + sourceId + "/items/" + name;
return "datasources/" + sourceId + "/items/" + BaseApiService.escapeResourceName(name);
}

public static String unescapeItemName(String name) {
return BaseApiService.decodeResourceName(name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public MockItem(String itemName, Multimap<String, Object> values) {
}

public Item getItem() {
Item item = new IndexingItemBuilder(name)
return new IndexingItemBuilder(name)
.setValues(values)
.setTitle(FieldOrValue.withField(TITLE))
.setSourceRepositoryUrl(FieldOrValue.withField(URL))
Expand All @@ -78,7 +78,6 @@ public Item getItem() {
.setPayload(getByteArrayValue(values, PAYLOAD))
.setAcl(MockItem.<Acl>getSingleValue(values, ACL).orElse(null))
.build();
return item;
}

private static ItemType getItemType(Multimap<String, Object> values) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,19 @@
*/
package com.google.enterprise.cloudsearch.sdk.indexing;

import static com.google.enterprise.cloudsearch.sdk.Util.unescapeItemName;
import static java.net.HttpURLConnection.HTTP_NOT_FOUND;
import static org.junit.Assert.assertEquals;

import com.google.api.client.googleapis.json.GoogleJsonResponseException;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.cloudsearch.v1.model.Item;
import com.google.api.services.cloudsearch.v1.model.ItemMetadata;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.awaitility.Awaitility;
import org.awaitility.Duration;

Expand Down Expand Up @@ -79,16 +84,46 @@ public void waitUntilDeleted(String itemId) {
}

/**
* Asserts that the expected and the actual items match.
* Asserts that the expected and the actual items match. Tests tend to use this by
* passing in an Item constructed in the test class to compare with an Item from the
* server. Tests have an option of setting the name to a fully-qualified name
* (datasources/id/items/name) or just the name. This comparison compares the item
* names, ignoring any prefixes.
*
* @throws AssertionError - if the items don't match.
*/
private void assertItemsMatch(Item expected, Item actual) {
logger.log(Level.INFO, "Verifying item {0}...", actual);
logger.log(Level.INFO, "Comparing item \n{0} \nto \n{1}", new Object[] {expected, actual});

// TODO(lchandramouli): verify all applicable meta data
assertEquals("ACCEPTED", actual.getStatus().getCode());
assertEquals(expected.getItemType(), actual.getItemType());
assertEquals(expected.getMetadata(), actual.getMetadata());
assertEquals(expected.getName(), actual.getName());
assertEquals("name", getItemName(expected.getName()), getItemName(actual.getName()));
assertEquals("item type", expected.getItemType(), actual.getItemType());

ItemMetadata expectedMetadata = expected.getMetadata();
ItemMetadata actualMetadata = actual.getMetadata();
if (!expectedMetadata.equals(actualMetadata)) {
// toString() produces different output (expected does not contain quotes, actual
// does), so set a JSON factory here so assertEquals can highlight the differences.
expectedMetadata.setFactory(JacksonFactory.getDefaultInstance());
assertEquals(expectedMetadata.toString(), actualMetadata.toString());
}
}

private static final Pattern NAME_PATTERN = Pattern.compile("^datasources/[^/]+/items/([^/]+)$");

/**
* For a fully-qualified id (datasources/sourceid/items/name), returns the name
* portion, unescaped, otherwise returns the name.
*
* @param name an item name
*/
private String getItemName(String name) {
Matcher matcher = NAME_PATTERN.matcher(name);
if (matcher.matches()) {
name = matcher.group(1);
return unescapeItemName(name);
}
return name;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static com.google.enterprise.cloudsearch.sdk.Util.PUBLIC_ACL;
import static com.google.enterprise.cloudsearch.sdk.Util.getRandomId;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
Expand Down Expand Up @@ -192,8 +193,12 @@ public void fullTraversalOnePageTest() throws InterruptedException, IOException
FakeIndexingRepository mockRepo = new FakeIndexingRepository.Builder()
.addPage(asList(item))
.build();
runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties()));
testUtils.waitUntilEqual(itemId, item.getItem());
try {
runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties()));
testUtils.waitUntilEqual(itemId, item.getItem());
} finally {
v1Client.deleteItemsIfExist(asList(itemId));
}
}

@Test
Expand All @@ -218,9 +223,13 @@ public void fullTraversalTwoPagesTest() throws InterruptedException, IOException
.addPage(asList(itemPdf))
.addPage(asList(itemHtm))
.build();
runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties()));
testUtils.waitUntilEqual(pdfItemId, itemPdf.getItem());
testUtils.waitUntilEqual(htmItemId, itemHtm.getItem());
try {
runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties()));
testUtils.waitUntilEqual(pdfItemId, itemPdf.getItem());
testUtils.waitUntilEqual(htmItemId, itemHtm.getItem());
} finally {
v1Client.deleteItemsIfExist(asList(pdfItemId, htmItemId));
}
}

@Test
Expand Down Expand Up @@ -770,6 +779,36 @@ public void incrementalChanges_deleteItem_succeeds() throws Exception {
}
}

@Test
public void indexing_charactersInId_succeeds() throws InterruptedException, IOException {
// Test all printable ASCII characters, a Latin-1 character, and a larger code point.
StringBuilder idBuilder = new StringBuilder("\u00f6\u20ac"); // o-umlaut Euro
for (int i = 32; i < 127; i++) {
idBuilder.append((char) i);
}
// This id is not qualified with "datasources/<id>/items/". It's intended to match
// what a typical Repository would use when creating an Item.
String rawItemId = "ItemIdTest_" + idBuilder.toString() + "_" + getRandomId();
// Note: not local method getItemId; that one will add another random number to the end
String escapedFullId = Util.getItemId(indexingSourceId, rawItemId);
MockItem mockItem = new MockItem.Builder(rawItemId)
.setTitle("Item Id Test")
.setContentLanguage("en-us")
.setItemType(ItemType.CONTENT_ITEM.toString())
.setAcl(PUBLIC_ACL)
.build();
FakeIndexingRepository mockRepo = new FakeIndexingRepository.Builder()
.addPage(asList(mockItem))
.build();

try {
runAwaitFullTraversalConnector(mockRepo, setupConfiguration(new Properties()));
testUtils.waitUntilEqual(escapedFullId, mockItem.getItem());
} finally {
v1Client.deleteItemsIfExist(asList(escapedFullId));
}
}

private void verifyStructuredData(String itemId, String schemaObjectType,
Item expectedItem) throws IOException {
Item actualItem = v1Client.getItem(itemId);
Expand All @@ -790,6 +829,7 @@ private void runAwaitFullTraversalConnector(FakeIndexingRepository mockRepo, Str
}

private String getItemId(String itemName) {
// Util.getItemId returns datasources/<source>/items/<itemname>
return Util.getItemId(indexingSourceId, itemName) + getRandomId();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@
import com.google.api.client.util.Beta;
import com.google.api.client.util.ExponentialBackOff;
import com.google.api.client.util.FieldInfo;
import com.google.api.client.util.escape.Escaper;
import com.google.api.client.util.escape.PercentEscaper;
import com.google.common.base.Strings;
import com.google.common.util.concurrent.AbstractIdleService;
import com.google.enterprise.cloudsearch.sdk.StatsManager.OperationStats;
import com.google.enterprise.cloudsearch.sdk.StatsManager.OperationStats.Event;
import java.net.URLDecoder;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.security.GeneralSecurityException;
import java.util.Collection;
import java.util.Collections;
Expand Down Expand Up @@ -412,6 +416,31 @@ private static Object setDefaultValuesForPrimitiveTypes(Object object) {
return object;
}

/**
* This escaper behaves the same as CharEscapers.escapeUriPath,
* except that semi-colons are also escaped, and the same as
* UrlEscapers.urlPathSegmentEscaper, except that semi-colons and
* plus-signs are also escaped. Both characters cause problems
* somewhere in the Google API Client for Java.
*
* @see com.google.api.client.util.escape.CharEscapers#escapeUriPath
* @see com.google.common.net.UrlEscapers#urlPathSegmentEscaper
*/
private static final Escaper URL_PATH_SEGMENT_ESCAPER =
new PercentEscaper(".-~_@:!$&'()*,=", false);

protected static String escapeResourceName(String name) {
return URL_PATH_SEGMENT_ESCAPER.escape(name);
}

protected static String decodeResourceName(String name) {
try {
return URLDecoder.decode(name.replace("+", "%2B"), "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException("unable to decode resource name " + name, e);
}
}

/**
* Common execute method for all api requests.
*
Expand Down