Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
import java.sql.SQLException;
import java.sql.Statement;
import java.text.Normalizer;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
Expand Down Expand Up @@ -48,8 +46,13 @@ public class IndexAuthorityRecords {
private static EnumSet<HeadingType> authorTypes = EnumSet.of(
HeadingType.PERSNAME, HeadingType.CORPNAME, HeadingType.EVENT);

private static final String ARG_INDEX_ALL = "--index-all";
private static final String ARG_SETUP_DB = "--setup-db";
protected static final String ARG_INDEX_ALL = "--index-all";
protected static final String ARG_SETUP_DB = "--setup-db";
protected static final int NULL_LCCN = -1;
protected static final int UNRECOGNIZED_SOURCE = -2;
protected static final String MIN_YEAR_WEEK = "00.00";
protected static final String MAX_YEAR_WEEK = "50.01";
protected static final String IndexAuthorityRecordsCursorName = "index_authority_records";

public static void main(String[] args)
throws FileNotFoundException, IOException, SQLException {
Expand All @@ -70,13 +73,14 @@ protected static String indexAllAuthorityRecords(Config config, boolean setupDb)
try ( Connection authority = config.getDatabaseConnection("Authority");
Connection headings = config.getDatabaseConnection("Headings") ) {

String maxModdate = getMaxModDate(authority);
Set<String> identifiers = getAllIdentifiers(authority);

//set up database (including populating description maps)
if (setupDb)
setUpDatabase(headings);

// It assumes we have at least one data after 2000.
String maxYearWeek = getMaxYearWeek(authority, MIN_YEAR_WEEK);
Set<String> identifiers = getAllIdentifiers(authority);

for (String identifier : identifiers) {
MarcRecord rec = getMostRecentRecord(authority, identifier);
if (rec == null) continue;
Expand All @@ -89,10 +93,15 @@ protected static String indexAllAuthorityRecords(Config config, boolean setupDb)
continue;
}

processAuthorityMarc( headings, rec );
try {
processAuthorityMarc( headings, rec );
} catch (Exception ex) {
System.out.println("ERROR: Exception encountered while processing " + rec.id);
System.out.println(ex);
}
}

return updateCursor(headings, maxModdate);
return updateCursor(headings, maxYearWeek);
}
}

Expand All @@ -102,30 +111,39 @@ protected static String indexNewAuthorityRecords(Config config) throws IOExcepti
Connection headings = config.getDatabaseConnection("Headings") ) {

String cursor = getCursor(headings);
String maxModdate = getMaxModDate(authority);
String maxYearWeek = getMaxYearWeek(authority, cursor);
if (maxYearWeek.equalsIgnoreCase(cursor))
return cursor;

Set<String> identifiers = getNewIdentifiers(authority, cursor);

headings.setAutoCommit(false);
for (String identifier : identifiers) {
MarcRecord rec = getMostRecentRecord(authority, identifier);
if (rec == null) continue;
try {
removeExistingAuthorityRecord(headings, rec);

String heading = null;
for (DataField f : rec.dataFields) if (f.tag.startsWith("1"))
heading = nativeHeading(f);
Character recordStatus = rec.leader.charAt(5);
if ( recordStatus.equals('d') || recordStatus.equals('o')) {
System.out.format("%s %s deleted\n", rec.id, heading);
headings.commit();
continue;
}

removeExistingAuthorityRecord(headings, rec);

String heading = null;
for (DataField f : rec.dataFields) if (f.tag.startsWith("1"))
heading = nativeHeading(f);
Character recordStatus = rec.leader.charAt(5);
if ( recordStatus.equals('d') || recordStatus.equals('o')) {
System.out.format("%s %s deleted\n", rec.id, heading);
continue;
processAuthorityMarc( headings, rec );
headings.commit();
} catch (Exception ex) {
System.out.println("ERROR: Exception encountered while processing " + rec.id);
System.out.println(ex);
headings.rollback();
}

processAuthorityMarc( headings, rec );
headings.commit();
}

cursor = updateCursor(headings, maxModdate);
cursor = updateCursor(headings, maxYearWeek);
headings.commit();

return cursor;
Expand All @@ -134,7 +152,7 @@ protected static String indexNewAuthorityRecords(Config config) throws IOExcepti

protected static MarcRecord getMostRecentRecord(Connection authority, String identifier) throws SQLException {
try ( PreparedStatement getAuthStmt = authority.prepareStatement(
"SELECT marc21 FROM authorityUpdate WHERE id = ? ORDER BY moddate DESC, updateFile DESC LIMIT 1")) {
"SELECT marc21 FROM authorityUpdate WHERE id = ? ORDER BY updateFile DESC LIMIT 1")) {
getAuthStmt.setString(1, identifier);
try (ResultSet rs = getAuthStmt.executeQuery()) {
while (rs.next())
Expand Down Expand Up @@ -173,29 +191,40 @@ protected static Set<String> getAllIdentifiers(Connection authority) throws SQLE
return identifiers;
}

protected static String subtractOneDay(String inputDateString) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
LocalDate date = LocalDate.parse(inputDateString, formatter);
LocalDate dateYesterday = date.minusDays(1);
return dateYesterday.format(formatter);
}

protected static String getCursor(Connection headings) throws SQLException {
try (Statement stmt = headings.createStatement();
ResultSet rs = stmt.executeQuery("SELECT current_to_date FROM headingsUpdateCursor WHERE cursor_name = 'index_authority_records'")) {
if (rs.next()) return subtractOneDay(rs.getString(1));
if (rs.next()) return rs.getString(1);

throw new SQLException("headingsUpdateCursor table is empty!");
}
}

protected static String getMaxModDate(Connection authority) throws SQLException {
try (PreparedStatement pstmt = authority.prepareStatement("SELECT MAX(moddate) as maxModDate FROM authorityUpdate");
ResultSet rs = pstmt.executeQuery()) {
if (rs.next()) return rs.getString(1);
/*
* Try to find maximum year week from authorityUpdate.
* It tries to find maximum unname and unsub separately and return the bigger one.
* If it doesn't find new data since previousCursor, it returns previousCursor.
*/
protected static String getMaxYearWeek(Connection authority, String previousCursor) throws SQLException {
String sql = "SELECT MAX(updateFile) FROM authorityUpdate WHERE updateFile > ? and updateFile < ?";
String newMax = previousCursor;
try (PreparedStatement pstmt = authority.prepareStatement(sql)) {
for (String columnPrefix : Arrays.asList("unname", "unsub")) {
pstmt.setString(1, columnPrefix + previousCursor);
pstmt.setString(2, columnPrefix + MAX_YEAR_WEEK);
ResultSet rs = pstmt.executeQuery();
if (rs.next()) {
String thisMax = rs.getString(1);
if (thisMax == null)
continue;
String thisYearWeek = thisMax.substring(thisMax.length() - 5);
if (thisYearWeek.compareToIgnoreCase(newMax) > 0)
newMax = thisYearWeek;
}
}
}

throw new SQLException("Shouldn't get here when getting max moddate!");
return newMax;
}

protected static String updateCursor(Connection headings, String cursor) throws SQLException {
Expand All @@ -208,11 +237,16 @@ protected static String updateCursor(Connection headings, String cursor) throws

protected static Set<String> getNewIdentifiers(Connection authority, String cursor) throws SQLException {
Set<String> identifiers = new TreeSet<>();
try (PreparedStatement pstmt = authority.prepareStatement("SELECT DISTINCT id FROM authorityUpdate WHERE moddate > ?")) {
pstmt.setString(1, cursor);
try (ResultSet rs = pstmt.executeQuery()) {
while (rs.next()) {
identifiers.add(rs.getString(1));
String sql = "SELECT DISTINCT id FROM authorityUpdate WHERE updateFile > ? AND updateFile < ?";
try (PreparedStatement pstmt = authority.prepareStatement(sql)) {
for (String columnPrefix : Arrays.asList("unname", "unsub")) {
String min = columnPrefix + cursor;
String max = columnPrefix + MAX_YEAR_WEEK;
pstmt.setString(1, min);
pstmt.setString(2, max);
try (ResultSet rs = pstmt.executeQuery()) {
while (rs.next())
identifiers.add(rs.getString(1));
}
}
System.out.format("%d new records in authorityUpdate.\n",identifiers.size());
Expand Down Expand Up @@ -260,11 +294,11 @@ protected static void removeExistingAuthorityRecord(Connection headings, MarcRec
PreparedStatement removeFromAuthority = headings.prepareStatement("DELETE FROM authority WHERE id = ?");) {

AuthorityData a = parseMarcRecord(record);
if (a == null) return;

Integer authorityId = getAuthorityId(headings, a);
if (authorityId == null) {
// a new record
if (authorityId == null || authorityId < 0)
return;
}

removeReference(headings, authorityId);

Expand Down Expand Up @@ -296,7 +330,7 @@ protected static void setUpDatabase(Connection headings) throws SQLException {
+ "`heading_id` int(10) unsigned NOT NULL, "
+ "`authority_id` int(10) unsigned NOT NULL, "
+ "`note` text NOT NULL, "
+ "KEY (`heading_id`)) "
+ "KEY (`heading_id`), "
+ "KEY (`authority_id`)) "
+ "ENGINE=MyISAM DEFAULT CHARSET=utf8");

Expand Down Expand Up @@ -366,7 +400,7 @@ protected static void setUpDatabase(Connection headings) throws SQLException {
+ "`heading_id` int(10) unsigned NOT NULL, "
+ "`authority_id` int(10) unsigned NOT NULL, "
+ "`rda` text NOT NULL, "
+ "KEY `heading_id` (`heading_id`)) "
+ "KEY `heading_id` (`heading_id`), "
+ "KEY `authority_id` (`authority_id`)) "
+ "ENGINE=MyISAM DEFAULT CHARSET=utf8");

Expand All @@ -382,7 +416,7 @@ protected static void setUpDatabase(Connection headings) throws SQLException {

stmt.execute("CREATE TABLE `headingsUpdateCursor` ( "
+ "`cursor_name` varchar(25) NOT NULL, "
+ "`current_to_date` date DEFAULT NULL, "
+ "`current_to_date` char(5) DEFAULT NULL, "
+ "PRIMARY KEY (`cursor_name`)) "
+ "ENGINE=MyISAM DEFAULT CHARSET=utf8");
}
Expand Down Expand Up @@ -470,7 +504,7 @@ else if (recordType.equals('c'))
addToRdaData(a.rdaData,f);
} else if (f.tag.startsWith("4")) {
// equivalent values
Relation r = determineRelationship(f);
Relation r = determineRelationship(f, rec.id);
if (r != null) {
if ( a.mainHead == null ) {
System.out.println("Found 4xx relation while main heading is null.");
Expand All @@ -483,7 +517,7 @@ else if (recordType.equals('c'))
}
} else if (f.tag.startsWith("5")) {
// see alsos
Relation r = determineRelationship(f);
Relation r = determineRelationship(f, rec.id);
if (r != null) {
a.expectedNotes.addAll(r.expectedNotes);
r.heading = processHeadingField(f,null);
Expand Down Expand Up @@ -755,13 +789,12 @@ protected static void insertNote(Connection headings, Integer headingId, Integer
}

protected static Integer getAuthorityId(Connection headings, AuthorityData a) throws SQLException {
if (a.lccn == null) return null;
if (a.lccn == null) return NULL_LCCN;
for ( AuthoritySource source : AuthoritySource.values() )
if (source.prefix() != null && a.lccn.startsWith(source.prefix()))
a.source = source;
if (a.source == null) {
System.out.println("Not registering authority. Failed to recognize source: "+a.lccn);
return null;
return UNRECOGNIZED_SOURCE;
}

Integer authorityId = null;
Expand All @@ -785,7 +818,12 @@ protected static void getSetAuthorityId(Connection headings, AuthorityData a) th
Integer authorityId = getAuthorityId(headings, a);

if ( authorityId != null )
System.out.println("Possible duplicate authority ID: "+authorityId);
if ( authorityId == NULL_LCCN )
System.out.println("Null LCCN for native heading: " + a.nativeHeading);
else if ( authorityId == UNRECOGNIZED_SOURCE )
System.out.println("Not registering authority. Failed to recognize source: "+a.lccn);
else
System.out.println("Possible duplicate authority ID: "+authorityId);

else try ( PreparedStatement stmt = headings.prepareStatement(
"INSERT INTO authority"
Expand All @@ -804,7 +842,7 @@ else try ( PreparedStatement stmt = headings.prepareStatement(
if (generatedKeys.next())
authorityId = generatedKeys.getInt(1); }
}
if (authorityId == null) return;
if (authorityId == null || authorityId < 0) return;

try (PreparedStatement pstmt = headings.prepareStatement(
"REPLACE INTO authority2heading (heading_id, authority_id, main_entry) VALUES (?,?,1)")) {
Expand Down Expand Up @@ -929,7 +967,7 @@ protected static void insertRef(Connection headings, int fromId, int toId, int a
}
}

protected static Relation determineRelationship( DataField f ) {
protected static Relation determineRelationship( DataField f, String id ) {
// Is there a subfield w? The relationship note in subfield w
// describes the 4XX or 5XX heading, and must be reversed for the
// from tracing.
Expand All @@ -940,6 +978,11 @@ protected static Relation determineRelationship( DataField f ) {
if (sf.code.equals('w')) {
hasW = true;

if (sf.value == null || sf.value.isEmpty()) {
System.out.println("ERROR: Subfield w with empty value detected " + id);
continue;
}

switch (sf.value.charAt(0)) {
case 'a':
//earlier heading
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@

@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class IndexAuthorityRecordsTest extends DbBaseTest {
// This value is the max moddate from the test data we added to authorityUpdate table.
static final String MAX_MODDATE = "2025-01-15";
static final String STARTING_CURSOR = "2025-01-14";
// This value is the max year week portion of authorityFile column from the test data we added to authorityUpdate table.
static final String MAX_YEAR_WEEK = "25.01";
static final String STARTING_CURSOR = "20.01";
static final String IGNORED_UPDATE_FILE = "unname99.01";
static final String NEW_UPDATE_FILE = "unname25.02";
static final String NEW_MAX_YEAR_WEEK = "25.02";

@BeforeClass
public static void setup() throws IOException, SQLException {
Expand Down Expand Up @@ -57,7 +60,7 @@ public void testIndexAllAuthorityRecords() throws SQLException, FileNotFoundExce
Integer referenceId = dbQueryGetInt(referenceRel, authId);
assert referenceId != null;

assert cursor.equalsIgnoreCase(MAX_MODDATE) : "New cursor date should be max moddate from authorityUpdate";
assertEquals(MAX_YEAR_WEEK, cursor, "New cursor should be " + MAX_YEAR_WEEK);
}
}

Expand All @@ -66,10 +69,10 @@ public void testIndexAllAuthorityRecords() throws SQLException, FileNotFoundExce
public void testIndexNewAuthorityRecords() throws SQLException, FileNotFoundException, IOException {
try ( Connection authority = config.getDatabaseConnection("Authority");
Connection headings = config.getDatabaseConnection("Headings") ) {
Set<String> identifiers = IndexAuthorityRecords.getNewIdentifiers(authority, MAX_MODDATE);
Set<String> identifiers = IndexAuthorityRecords.getNewIdentifiers(authority, MAX_YEAR_WEEK);
assertEquals(0, identifiers.size());

identifiers = IndexAuthorityRecords.getNewIdentifiers(authority, "2021-01-15");
identifiers = IndexAuthorityRecords.getNewIdentifiers(authority, STARTING_CURSOR);
assertEquals(2, identifiers.size());

String deletedIdentifier = "sh 85066170";
Expand All @@ -91,11 +94,16 @@ public void testIndexNewAuthorityRecords() throws SQLException, FileNotFoundExce
headingUpdate.setInt(1, existingHeadingIds.get(0));
headingUpdate.executeUpdate();

String cursor = IndexAuthorityRecords.getCursor(headings);
assert cursor.equalsIgnoreCase(STARTING_CURSOR) : "getCursor should subtract one day from previous cursor date";

cursor = IndexAuthorityRecords.indexNewAuthorityRecords(config);
assert cursor.equalsIgnoreCase(MAX_MODDATE) : "New cursor should match the max moddate in authorityUpdate";
/*
* The test data contains a record whose updateFile is set to 99.01 so it wouldn't get picked up initially.
* Set the updateFile to new max for indexNewAuthorityRecords test.
*/
PreparedStatement changeUpdateFile = authority.prepareStatement("UPDATE authorityUpdate SET updateFile = ? WHERE updateFile = ?");
changeUpdateFile.setString(1, NEW_UPDATE_FILE);
changeUpdateFile.setString(2, IGNORED_UPDATE_FILE);
changeUpdateFile.executeUpdate();
String cursor = IndexAuthorityRecords.indexNewAuthorityRecords(config);
assertEquals(NEW_MAX_YEAR_WEEK, cursor, "New cursor should match the max year week of updateFile in authorityUpdate");

Integer newAuthId = dbQueryGetInt(authByNativeId, "sh 85066169");
assert newAuthId != null;
Expand Down
Loading