From 97c9d4ee01f9bcee31f2c00903cb7a46dba1a2bf Mon Sep 17 00:00:00 2001 From: "David P. Steelman" Date: Mon, 6 Oct 2025 08:49:28 -0400 Subject: [PATCH 1/5] LIBDRUM-991. Add basic tests for ETD Loader Added tests to verify ETD Loader using a single item, and a single item with an embargo. https://umd-dit.atlassian.net/browse/LIBDRUM-991 --- .../edu/umd/lib/dspace/app/EtdLoaderTest.java | 295 ++++++++++++++++++ .../dspace/app/etdadmin_embargoed_item.zip | Bin 0 -> 10154 bytes .../app/etdadmin_upload_test_one_item.zip | Bin 0 -> 9435 bytes 3 files changed, 295 insertions(+) create mode 100644 dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java create mode 100644 dspace/modules/additions/src/test/resources/edu/umd/lib/dspace/app/etdadmin_embargoed_item.zip create mode 100644 dspace/modules/additions/src/test/resources/edu/umd/lib/dspace/app/etdadmin_upload_test_one_item.zip diff --git a/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java b/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java new file mode 100644 index 000000000000..240add1e5a9e --- /dev/null +++ b/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java @@ -0,0 +1,295 @@ +package edu.umd.lib.dspace.app; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.StringContains.containsString; + +import java.io.File; +import java.io.StringWriter; +import java.net.URL; +import java.sql.SQLException; + +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.core.Appender; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.WriterAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.apache.logging.log4j.core.layout.PatternLayout; +import org.dspace.AbstractUnitTest; +import org.dspace.builder.AbstractBuilder; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.content.Collection; +import org.dspace.content.Community; +import org.dspace.content.EtdUnit; +import org.dspace.content.EtdUnitTestUtils; +import org.dspace.content.MetadataSchema; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.EtdUnitService; +import org.dspace.content.service.MetadataFieldService; +import org.dspace.content.service.MetadataSchemaService; +import org.dspace.core.Context; +import org.dspace.eperson.EPerson; +import org.dspace.eperson.Group; +import org.dspace.eperson.factory.EPersonServiceFactory; +import org.dspace.eperson.service.GroupService; +import org.dspace.services.ConfigurationService; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests for the EtdLoader + */ +public class EtdLoaderTest extends AbstractUnitTest { + TestEtdLoaderConfiguration testEtdLoaderConfig = new TestEtdLoaderConfiguration(); + private TestLog4JLogger etdLogger; + + @BeforeClass + public static void initTestEnvironment() { + // Need to initialize AbstractBuilder so services for various builders + // are properly initialized + AbstractBuilder.init(); + } + + /** + * This method will be run before every test as per @Before. It will + * initialize resources required for the tests. + * + * Other methods can be annotated with @Before here or in subclasses + * but no execution order is guaranteed + */ + @Before + @Override + public void init() { + super.init(); + + etdLogger = new TestLog4JLogger("edu.umd.lib.dspace.app.EtdLoader", Level.INFO); + etdLogger.setUp(); + + testEtdLoaderConfig.initDspaceForEtdLoader(context); + + // Kludge to reset static counts in EtdLoader + EtdLoader.lEmbargo = 0; + EtdLoader.lRead = 0; + EtdLoader.lWritten = 0; + } + + @After + @Override + public void destroy() { + etdLogger.tearDown(); + super.destroy(); + } + + @AfterClass + public static void destroyTestEnvironment() throws SQLException { + // Unload DSpace services + AbstractBuilder.destroy(); + } + + @Test + public void testMainOneItem() throws Exception { + testEtdLoaderConfig.setEtdLoaderScriptProperties( + "/edu/umd/lib/dspace/app/etdadmin_upload_test_one_item.zip", eperson); + + String[] args = new String[0]; + + EtdLoader.main(args); + + String logOutput = etdLogger.getLog(); + assertThat(logOutput, containsString("Records written: 1")); + assertThat(logOutput, containsString("Embargoes: 0")); + } + + @Test + public void testMainEmbargoedItem() throws Exception { + testEtdLoaderConfig.setEtdLoaderScriptProperties( + "/edu/umd/lib/dspace/app/etdadmin_embargoed_item.zip", eperson); + + String[] args = new String[0]; + + EtdLoader.main(args); + + String logOutput = etdLogger.getLog(); + assertThat(logOutput, containsString("Records written: 1")); + assertThat(logOutput, containsString("Embargoes: 1")); + assertThat(logOutput, containsString("Embargoed until Tue Jun 26 00:00:00 IST 3027")); + } +} + +/** + * Provides setup/cleanup for the DSpace configuration needed to run the + * EtdLoader class. + */ +class TestEtdLoaderConfiguration { + private final static ConfigurationService configurationService = DSpaceServicesFactory.getInstance() + .getConfigurationService(); + + private final static EtdUnitService etdUnitService = ContentServiceFactory.getInstance().getEtdUnitService(); + + private final static GroupService groupService = EPersonServiceFactory.getInstance().getGroupService(); + + private final static MetadataSchemaService metadataSchemaService = ContentServiceFactory.getInstance() + .getMetadataSchemaService(); + + private final static MetadataFieldService metadataFieldService = ContentServiceFactory.getInstance() + .getMetadataFieldService(); + + private Group etdEmbargoGroup; + private Community testCommunity; + private Collection testCollection; + private EtdUnit etdUnit; + + /** + * Sets up the ETD group, metadata field entries, community, collection, and + * ETD Unit needed for the ETD Loader. + * + * This method is typically called from an @Before test method. + * + * @param context the DSpace context + */ + public void initDspaceForEtdLoader(Context context) { + context.turnOffAuthorisationSystem(); + try { + if (groupService.findByName(context, "ETD Embargo") == null) { + etdEmbargoGroup = groupService.create(context); + groupService.setName(etdEmbargoGroup, "ETD Embargo"); + groupService.update(context, etdEmbargoGroup); + } + + addMetadataField(context, "dc", "contributor", "department"); + addMetadataField(context, "dc", "contributor", "publisher"); + addMetadataField(context, "dc", "subject", "pqcontrolled"); + addMetadataField(context, "dc", "subject", "pquncontrolled"); + + testCommunity = CommunityBuilder.createCommunity(context) + .withName("ETD Test Community") + .build(); + testCollection = CollectionBuilder.createCollection(context, testCommunity) + .withName("ETD Test Collection") + .build(); + // testPerson = EPersonBuilder.createEPerson(context) + // .withEmail("test@test.com") + // .withPassword("test") + // .build(); + etdUnit = etdUnitService.findByName(context, "ETD Test Unit"); + if (etdUnit == null) { + etdUnit = EtdUnitTestUtils.createEtdUnit(context, "ETD Test Unit", false); + } + etdUnitService.addCollection(context, etdUnit, testCollection); + + etdUnit = context.reloadEntity(etdUnit); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + context.restoreAuthSystemState(); + } + } + + /** + * Sets the properties provided to the EtdLoader by the "load-etd" script + * and configuration properties. + * + * This method is typically called from within an @Test method + * @param etdZipFile the file path to the ETD Zip file to load + */ + public void setEtdLoaderScriptProperties(String etdZipFile, EPerson eperson) throws Exception { + URL zipFileResourceUrl = getClass().getResource(etdZipFile); + File zipFile = new File(zipFileResourceUrl.toURI()); + + System.setProperty("etdloader.zipfile", zipFile.getCanonicalPath()); + configurationService.setProperty("drum.etdloader.eperson", eperson.getEmail()); + configurationService.setProperty("drum.etdloader.collection", testCollection.getID().toString()); + } + + protected void addMetadataField(Context context, String metadataSchemaName, String element, String qualifier) + throws Exception { + MetadataSchema metadataSchema = metadataSchemaService.find(context, metadataSchemaName); + if (metadataFieldService.findByElement(context, metadataSchemaName, element, qualifier) == null) { + metadataFieldService.create(context, metadataSchema, element, qualifier, null); + } + } +} + +/** + * Replaces the logger for the given class, enabling the log output to be + * examined. + */ +class TestLog4JLogger { + private String loggerName; + private Level logLevel; + private LoggerContext logContext; + private Configuration config; + private StringWriter logOutput; + private Appender appender; + + /** + * Creates a TestLog4JLogger instance + * @param loggerName the name of the logger (typically a class name) of + * the logger to replace + * @param logLevel the Level to log at (Level.INFO, Level.DEBUG, etc.) + */ + public TestLog4JLogger(String loggerName, Level logLevel) { + this.loggerName = loggerName; + this.logLevel = logLevel; + } + + /** + * Sets up the logger. Should be called by an @Before method in the test + * (i.e., the JUnit "setUp" method, or equivalent). + */ + public void setUp() { + logContext = LoggerContext.getContext(false); + config = logContext.getConfiguration(); + + logOutput = new StringWriter(); + + PatternLayout layout = PatternLayout.newBuilder() + .withPattern("%msg%n") + .build(); + + appender = WriterAppender.newBuilder() + .setName("stringWriterAppender") + .setTarget(logOutput) + .setLayout(layout) + .build(); + + appender.start(); + + logContext.getConfiguration().addAppender(appender); + + + LoggerConfig loggerConfig = LoggerConfig.newBuilder() + .withLevel(logLevel) + .withLoggerName(loggerName) + .withConfig(config).build(); + + loggerConfig.addAppender(appender, null, null); + config.addLogger(loggerName, loggerConfig); + logContext.updateLoggers(); + } + + /** + * Tears down the logger. Should be called by an @After method in the test + * (i.e., the JUnit "tearDown" method, or equivalent). + */ + public void tearDown() { + // Clean up: remove the logger config and stop the appender + config.removeLogger(loggerName); + appender.stop(); + logContext.updateLoggers(); + + } + + /** + * Returns a String containing the messages sent to the log. + * @return a String containing the messages sent to the log. + */ + public String getLog() { + return logOutput.toString(); + } +} \ No newline at end of file diff --git a/dspace/modules/additions/src/test/resources/edu/umd/lib/dspace/app/etdadmin_embargoed_item.zip b/dspace/modules/additions/src/test/resources/edu/umd/lib/dspace/app/etdadmin_embargoed_item.zip new file mode 100644 index 0000000000000000000000000000000000000000..3e0c9babfe4a50a126c165b0dc664e49905b72a1 GIT binary patch literal 10154 zcmai)1xzJMl%{cQTpM@i;_eMJ?(R5(A%zHb@?q+5uo64W6 zN~+FDDycs?C!eAWBornX7#JMbo%au&0^c|iS|~8EWkfJAVlVs;c@&;eK!6b(hRN2Cj%1wBu| zS*3;o0UOyyt-njnr!JB^zqJc{noV8Hzyy?+o^x?L8$q!XXmuUh9d;7~Qa_|SWnKd3 zdIxJCW`a$0oUehbydjeHw`zunj>$GtUed3W^GwbU&?=?qK|oesHpU1W0XphCKPnW( ziMwNdf_rB-b1}R{_tsCmUHcFNfJ6Ia`c=LIu6iw&eQo2at=Z=&bogd{4jB}QloYMz zY@}Qy?a8}`X7`Y=Nsy_rx+ zvif`99#4h|>Z0gB@C18%erT8emR0f40M_ViCvNifI)254>PH%G#(&8~aUy?(7EzmS zI*;UIBbG}4&<+e=@@KrsRKrR{-|cpO7phH;0(rvku>W0DD;Lg_=rHAG9s2sn#RZnh>OG<{u7>KwuV8| zt?a;~5;x-=8eX{o?M8hkRyiJAv7TW8#vbQpeR%PoL{aWPZcj1|<8nast_s)g zJY5w7g*bDA8)%h}oBD9^Ts^RlT}hm3xmLGra(r<)E1pIJxM?C1!TWNGt~%244LAA- z)pnx==ox7l9SPj3=V~9B?xU?$c*m(uwOmQt(_kJpfw zZUp-9L?dtQ_6pLMdb!!}V2c0Sf$;xvAVR$B^%qhwuvRWGFuecKfdCGs=Ksrl!sfOu za{keOF`o%9T`shbLx-cC87hN6RI=XdWd>kTz0s;tGM+D?6@}XCiCad~VBnn@-c3p;ehdLy?(6j2ruBmJkV#m{(7qV;+y8u4P;s zbx9`Mna^!WZT5Da@(=+*9K0~>A#9H?T9$%)c$>B7$<>A=>no~YEva!+cFphUr!%Nx zwuPR;dS>C?XtNFo=LIV0Gp6?4-_s>5U9;`UV|U_wPv zq;@BpKinS|oRuxnc{>q;>Vun)?=CPmQ_z=%emlbDpDM zA?Y(d_fWPEV(kG-I{7V+2DRr>Q|5Dmezl%MN{rUcQp&$&YKEomn6DNQm2zc09jq3t)R%nxId#8c~^soKO?`0a8vL148 z-7?9+LgH_sCmFGkfJb9AvNnk5u=TN2T9q!(rgm2WBb1JUu>>4ZW%@%=h_ysllP}0x zqB}rMUpzF?I!g52V3RiwGJ1+@G0LPCVtpQ*_P@8rjWqRMa!PRNt@x&`!Dd;DCeK8M zU7Z{m*INn+bJi~AYtHB2Ef&Xzh8Fck` z6RG)_61ZdAvcbWpAB4=)rmbvkeOP_}Ja+wnEv(KxwM+$0YlLeBwJf5-ti5KoGcR0x z!mM0-hdjF?B^Ul>wj~X@2iq9KZ=`;dD}_I!9LJEa z9oM@9&Vs#hw`i;cwXk2Oc-JBtoTWRBmG1KjFDJz5*wCl5gt8RG#$3#`+0>}8x{w85A9oa89;`sOc6WrTLWQivNm9Ibk3nLP) zEsVA;oMX*3{mM_TkJfx$Eg~lpE^;>|O$QIck8NFHnd~}(8wjZIC$N^A*DFVJ43O)2EDZplo@~Z>`>`Aa`jrKWi>lX+OdGFH@CGPAJLy*uYlHZaTwCPzTb1L>%tE^wx@RbwU6}Si)(uGx z?|x5@V)dfQDYq1=tb(T6JzvU3(8YhTUK8<1LukZV`Oq{+CSLbAd=>a>7YrYF7>Y%d zqMxzM;hC9VHf=~--iQ~@BxFxS9&%OTh~`cGe3`j|S=1~l3;hkHPhV;si)?su22&%2 zfI>d#I@J|V+u6WFHQ>F|75gUL24toxWO`FN`St)QgB&I|+(%f+NN4JueVQ0_*Fn#L zP|q3{Syw}AT>Cb}Di;Z@=sYXIa-oc}m{}u*$az(|DDP}YbCDl**0#92u~}zmYkM{se@{>e4bz_(qhqm25B<)WAb=-XCOfz_ z2SKP6^xW#$NNWkHX6t;!Qd%)K1Bd=0pEnL;a?ZR`bi9#e*>pMSnS~4goynN1)DbvR zW=E42h)8V1(mk8?$a;`^bsGm?#o#J=X}8L+>+4)$g(z&f4J6xnm^a9TvJ^i>wA()B z+?sR5@m*=)8k*I*Uq3$f0ST5CXCI;&AHc7$Y8);vXbQ`c~-b~=s5h)uWn21(iq_O2;o*sOrnm>ZW63doPPPigB%*- z5|CA5l-dq3*dQ$W;nRP~A@?>D@)g@xjkF&3Tu@1Ng|IgGU^b3fQFhEy9248_Ezh8=rnHHyGxdT?X9-S;;WwdPk6+#z0`!lJWj_5^T>; zS>-0h-K5J>rtQY!eWpqKa!HI->9l8SqPno1s^EsR?*zYj**&T%5BwYyg$gIEA} zR~+`_!RGD7IkJJE_A;Yz-V=Ffp<7^gcw0~{Icm*F+d=F`7MeA*R#EteRANP~3q$Yb zUNh0>*LXz~;@*@O!MD<7=D$8UReMojT$Uq=9D)~V>c{7B$&dxbPHrDtr{b*PFeia*45y@OdIuV%JH=|e=S@)DXn$3Pl zEc?%MM}8i~1fefe+nG=C(w%aeO;=2LzL)`9EVQo8 zC+kOzf>PX^UH(iUW=D9iYx9a^aL0QQJI+i-xW@_%mw;}HNu0QHoWp!ICv0~a&Ekq| z8@9x7e+A?J9h?>TTa4wvw`9@Zj&!Lm?CRmI2ONVh`3?1!aT&z&dRFjsK}1JVQ(15p zGy)^QrH^Z}KABOI8#Q8*uG)mp!12Sxnyp06xK-=Bik=Evx@ zYYMZ>S>q>uvEayxE$bB>Z@2m(NB{K>8ShM_%mCd?6?OajiGSRtz+2S5G3y4Uzzgq8 zYX%H!sRHKX=SS(Vr35BIGlH=(?;=wWl0#iKt%upNtjCRnZNEXG_}bOqcyNo@^r+sN zMO-Cfx7389!lw-=b_UwW@fq=lH+imL_#5xC6Hx_uf*^Var!^%d#TB6(d-F z;u;N@@*RvzEcS}uAy0SbF&2j@xxe3){Y2C81=Pvu9%if37(E6XtpSRMIcJv#uQh}% zsm3UmM`?lzZMip=rIH}_G=}f_4^Xl~6V4$|ms%5wb}CFNC97Rtu=F0zcd<$xfl){? zA1r%r`{$Nk(*O1i`lB|eE(D-EMr50)NctGQEupKb=(o(%QL7TB5ORxiC|hBu)W^3w zG=_L!?$71EGvoKpmvB;5lnI0F3EM{*vu z&jZkK(%>~#M|SV<)!|tMPPG1EX0$%P7c9IE+tc+^h;P0*y$JBJ{yk2PKsp1XS;6zl z#)WSoJny`xunPBE$uAvT`@76Y4NTiX7YZp-Q)6=#noYF=Kwu?o5>zT>Wy(2s`z-6! zeEujabQPDU!*0LBDLJ_1W-B<7TX`@-B;8|p) z85ey&){QQ+9#TwZ{Q5(#=6R=RM%Ib$;NDu%(_QV~={fNr$s`24+mh#8?oetuNQ^Bt z0SdY{AhRO>db1iCp7Pnm(n%A{``BDd5bEh&b(+L()cw{`j-B#oxTa)$p`tVGlfCW% zUZ%QyJR+!b0gd$0Hu4tZUSj%s2e9!Y@P9ELG;zZ`K^@V<*YBhdeVtc?MaR_kXWgg@ zI+&GnqU#}_S{%?{BE_MUGC^sQ*dqrx>U!8A*?}g?8{@g@zMIAOi{(?6=~Iwk!XCF# zs=Z7$oL_#FYJztYzyDQ|XmM3Wf5R>EeATB>_^iS$u5HR|z*4s7G0={`%!)e0kS0 ze$M^%Sb%ERrpK)6OBS-g&=Q2M<@pg(<rCbN;*W$-U0gy@~@Zct;j1RT+J5Zub1V}P9biI5Y zePwXe|H`n6)1+aqI=J{-C08(R>hFRR#{2qWI?y4mz(}FMi{aNpg8Z@3U3~Ajs4L0` z4fc~hSB&MMZ--jfR7Ib#1)vF62eM=^3_mSpuEhqTbBZ4yEsF`C;h*0SwHFasG{G_2 zc&LW2>M^wev+lG$RE0=-osYui&6AlNWdQKYZ00p(?(F<>aeRVC^V|Nl&STMJTUT*W z@+3iA)VuQ+L0NhSHCj|`rw>ATk=>9NX_{FiwS#$B@E%Lnzrr3qYXEY3ToX1pR z;R6u+_p80LtV;6c?4OPoYYnkraRI?`w9pU84#9%bLgarcc|j;|f-)eF9i ziI*Y?oeJ7o)s~ud7YR79zLRlxg5ZS>N(GIXY@*83)q5MF*K8awBim*&k}Ik-DX@#8 zaSfa5PayT-iWRrIJ0n9}W^7`0am`U|@rccxd)O2ph6{}gUqu>E1)o<2fsg5xdIyr_>Dl^*@gq zFEE7FD${q*OKg`wgo2HS3>`QK-0?!{!Aobblge?w|D9%2^UVz*{bDuo4s?oezkRii zi05_IpeodbN^BJD)5ayl%3<9{3ypMeWpLY!EDa1Y+o552U9=-LQPWe}SfOs$?V5Gah zsDI}Ju|AQ4r?8Z4LNTGANiZ3MIa5=@ZLPsqgh`O!;LU(UijC+_C5mtP~v&#KGh8PqmGSaKS2{=p?5xBOnrg*UpieG@3s!WcyEr9%QQ7P++Il}<`z7X#ixBtUnr_FdeF&Oyhc}rCa-oTwwAM4m zH8(_gxJ#^M2~%6Ko=Y_+tdB)EE)HL^!|JP`z1YgwEo;EcCEfT4P-n@b<&W8BaxeFH zOdwR^6f^IxNdWgqOiJeNYvB|;skQoAMKRj?R=$c9%eIDl?IrFizP2Ccmq+n?+;xW9 zlu9#}i`;k>ylr5CLCAJ)rJ>|$iXQ8RsVs^40$OTQ$a!tE^rb*ChFp3xh}rn?RxOIz zMK^jR)1n=IdnhyHl`n0fgc4PqZ&Mt~BQREz@3kiCgvr@qe+}x}$~zQ+mEX37$@4b0 zAs-)|=Tj}A_u>@%Imf4^Fw$0--X-He1kg5#EcVn4XPKKj2E8P6?Oe9W)Rk3*`M%V> z1R5Xu!gxiGhoY2edqb{3MrN_2dtND%x3GwMkw*NF+8O5$pm29{f5JVdNUP?7g9LcX zj?Vqj%R<(9ap;w^b|8^De|2%~l)r#kK94)mMsgi(LMdH(fs%^xk&n%tx@?YB{krh0 z)cY#Z5&Y3azS=UVf63t!YtQNQ@yPzHTky;*T?cVYoryallce_pwA zdONCf^)e4l5|vPgb3FWOz8-wvGJFVCL%D8zClP?Ux0D5=6hS}^w$N;L5!0f+MAX?` z{-)W?jotmpiC4Ht&oJRmd^Lt0A$f3>;7*)yiY~Hv`mo`h-!!RVZ8M+Gzza5r>Q?Vw zJC%>_Snc+7=cFpv|8G2I;z)%?Z$j@ZjY~u*lQfQy}(U*z=D!z7Ya>3 zs4D>i;H=T3k0vjQld%BLIr7SwSA^L&iIVs_6b@rBss%6e=L%7e`t5d!HH%{<%e)7_ z+?iYj{TEFQgnd$KQN`?m+-G6n!+!b`lwth-l)X-+o%{8qWVmG+5(L?f^|Zoe#mUD) zp486N;K@o*DOn^TTGtK)4>EqItESk^rhb9Lm90u68cZVl9_lE-=b^heiSZZ8s4#!j z_VLmvo82}q>*x-yV1=3^g!oB|uI}$;K|l9BN!yyNbLt4G zcnm5v&ufCgZm8rcU%#i1O27aIogOKWem_lRMjsBaX=SvoX-@ZaV0Hc=ci|UwdU~xW z>b$OV$YO~quxjSixji?O3R7M@@S6w}SwPMZ0%_+sRyN$?St-Ipej)slIhH}suCF(X z*Bsut%j8*BvpHGqYa)GRU2IyxJybJvV*mv;)Qr`UO>>0LpSZabTfzSkH@jG+Mb^qw z53V0!M9Y3wA?tf5HA7Pe5HK3)?f8aM%7kq);0qLl_&wO|sJ8bs{-92nmEN})FmW)U zR3gf#f@3Zh#|}%QP<8kOB;!x_#I~p!v4KCD3Aro zXe-p9o%df7XB?(BKOs==+#v;-3@=@(bg}(iVQrR?C_P6IH+Rtrx;19EQo)4biNSSb zwJOTst7P*rejqDFXX>>31#s)1^x%IDax^1|sX!;*e>6I1IE>HHF5T-g=8N7?&*^v& zFfJ(kI68-2a3JO<5?GVJWN>M;-^S)@DA?}cIaRJ(Z+`;FDX;QVw>@_pFFO}W!^&J< zuhg?=G_Fi%mb$Gsaw6O-c3C+9C+zI5zUQ2=Fnng;vm>KFyiul@CmCUYsr)RNm+yij z@WZQ|TD=1;7$fqc&P7P_Y8>Hl5Va~{auDtNKRri5N6q_ED6?qZ)D-bUW>pWpO5TUf zs^tOc6=<`%JXFlOsPmZ3S8AVz->yetYGL7ebXBa|)(VdY)RTSLeST9m=5`F;j#BK5 z{$e}TTYgBbU9T0uiC>RDnL0?5$bCJk9kp?YuXCP>RhboPsWq-Nu9PWv_QzK?ycX}T z7oah%amBX8yLYfGzNkK%rr_XZy$NH>L`rp2=avzQ-rrXRCA(2$P%R@?3pC{oylEK( z4N?%S)UdkA+jJSRajh3!n20ihV9&gNb=fW=Z;deCXDZmpx}inh6uc23UPb8$wL1M( zA4ksMf$|$y(t0Ks8^3!jN%ttts~iY_-Jy|Y)5TK&6lIB&t_62Uys=REDNOQLZk|x7 zQdi+t0PMi5<0R(2UdWf7g5wNZCx3TI1cg0mr^Q}XH^s|%v6-*DG5uvMqvvJz`X>0` zEGS?=2a0ERB+7W{A`6z_3J31plB}11QldObBs)jmZFukddN(7Ahmw!HES<;()jh|$ zdekXF4K9x4T`+{asdzk&1)8F_35yR@UkjC<`gS3NQJKo)K7y!d7J=yiAuDb#J0M&M2kdV+bW{Mo$aoVJ66hmY@?j-PVC6bb~wK4R7cZ?+Q>-3*~m`@ZcHpZRZU3` zHdo8u6NB{84-GWKOQYGD9z=Wf1<@$F3OP{@>YI&5JcAxCIhk9Q>Q#}A1G%HSSev+= z(p=cK8|a(ZR?uJ zQUyr$Gn4~tH@9~=a@dD1%iw?lG=Jtfe>bOZK3nx9Se^J%@G?!5yPHxT{VOM)G4$BQ zB>uvMncVmc;G+F|Tle))*#f)yOF&_$elP<{EX->`^L>YO3PoLX2KW%+L>dx2=w`5r zL}mcDjwcr^4g=~purUnVdOHdd1SIePu zz@7|^zIMZc-r((IB4PE%cubOAgAOG{%>A$9aV-kBj!|wr&}l?o^Y`h~xPE&I()Zma`WY`51(Rw<)Y!gnh8zdQ zRRNEbH(@b=DG=wTydm`J##kr06ug11XF+G{oMMbIg=1|7UUk`nk(#@V@pmm%d%4IV zOYA|gRSCAxD^ryD4}Dbk0mA5wak=eKAuXOgTwtFKyXQ+%0fnfTU@V=)nd8l#ba$5A z15Q$0OYlnb#Z=CYzDM{erY)A1epU&2lr=IScu*;2jTD zBV{yo;BzN2+k!m*a5q}Krx-n-+6xJdrFH9r3+z0n${B_!U|Z+lzO0L_IgSzu|6Tw) z?dhx3jU=I`0wbZj4C%zsSonI-?|{xQVzrL`t6A3picD$_O_ZAkQf~5zB}7+fn42$& zeD2#fv4k0nNHK^#LuS;U07@yTu{z+_B+9npGZ&8(CLSE+1ktD_m>-=!$*Xlqz_;r# zySCsvSihQ-p5-OG{@g9@y85!VB!by!jxfwASr@l5ZaPg2o?$jmo9vnwjc}=8* z@v9-H<-+VsN^?;n8k6_f!mITU$d~i=lFN z!GiMVEKqR~f_@B*q3??saG?)NY{riI8}4Vteq}SYmj=K(#~^JdQw)qeZT)#e3EPT5 zH-Jq1GxRrT6p**;V zt_CG5hIfy%C$yBR8uJH=j0aUK0_yhfVqRx(fQXRzdyl!u_H55gKP*sw8sa^VVbn=E z3e-rh@fV!vip*NJH?hZaK0oipA8VK{kywic7Xc?cV5m6KZAvtJZu#cIR`5E@X zKFQz0dhf#FvHL6f5hcmr%AaZQOX^Ev?W-Yq#{-l}wnB0oPxgL#^@aG#mFSQ6ku@q5 z`zZ&?`}sw6t5D>R@lrO|6ZyVyyd(B$d2dhl#r}Ve`Ss5Jv2$bf*@lH%+u5zS&J*si zf6i?{T*fL^$Ti$NXNKYflZ1tP3o|<92I~QvJ|f;qbO zZSWEsCXgVpF<&!TfMz0g4YFzXYz0uh5pW0h`ahN$J*GmxSBZ@7gxd@E8b4e=L#h}A zjtBFg-V$C7{xxi7zBCQ}GKHOgA!0ng;wSmMci3J?wSWPGoC?@6t_Rb6Cy9Kt92g>OOD{z@pz5K$+bn_9gRYEr}pC(!ECBk@Xv}1vjA9hV{2$p)hV1kOFYG zLc05J1qg8}V*n2w5YDLV%9$X*EREo%t?V8>8eW1)myspac`c;h!RH`)f?EAWoR$2C z?+Is<)s@thz=QWBnQ`HvgV8K*@qhXapd2>jf9il4h@sA2?VGaTX2c8o_#-b< z^xTY17)ss7gZeZx-hcY~i~4yao8_Ade!IO0O#aT?AQ&^5OtFzn5e^|f;fIJF?#>5x zEl(>&*y{_sj>?Vsh;@Pi6C5h;C%G6N&`h1j0)+(jB_a$(0Dy;GhwQmaki>m??yhYE z|4InX_umsiyUz1Kb&5z5y340wKgZ#rx&O5-(l4|DNv;StSjUff`-=!YCg`(2SgO!( zk94=^S)A;1m+t*{}C}Uq|a|OSgdzf_fJT$)xyY2A`=tdeNAo; z$Cfbc0ph!MGAst=ZEDI-+5trbNt!5KZVN0*%pXL^fn+ITvRIy!(3r|1*w_Bi1e+4K z?vit8)@QQ!F3>@kd(w1Cx6*KCo>JW3s;djD!NT+BB*iHyJ+W{h3cfwBH}#@Q8ce{D zMG6;!2L+9<(*=CND$0OE5JCP=KpXBqpc@z%_&+KR{CD;L#oM6&6L0%3bjkmn2GMUxMYo#fOIZ_ddiwW9XlGhU(wle*piNxJ3W} literal 0 HcmV?d00001 diff --git a/dspace/modules/additions/src/test/resources/edu/umd/lib/dspace/app/etdadmin_upload_test_one_item.zip b/dspace/modules/additions/src/test/resources/edu/umd/lib/dspace/app/etdadmin_upload_test_one_item.zip new file mode 100644 index 0000000000000000000000000000000000000000..ff0d5662ac86323c5b3559391bb413eb0f7ebb06 GIT binary patch literal 9435 zcmai)RZJ!9(xrh7H16)&xVyVI?(Xhv+#MQd+-cmU*|@vAL*wr5F8_DtBr}saIhnkv zH+509*4;{7JW6r^NDMGAFc>gzTT$JhbV81M02r7M0vH%E7y+1wo2!+hi=msnnISU^ z3zvc+3o9EthoQKLnh2w(y`8!mEEoW%Y10b#pK|kr2Lp%fz$o&DERZt*>(ta$T<6B{ zKho6lVoWj%OqGad43-epS(XLRU8wHM{Ek_qm|8^=dp+~<9bbmEe+o;H%-p%-*=l{( zn>aZ7+a$si3P09XNv%(bN~Yf{ySk`3`*8@`ySuC3ik&|Oy1RP$bg%52EH|l9J1-!w zg+~!a9#*Ag3YRzQPS~abU}po@l?@IB4qYaodOMc&CBDR0P({O0kzIzlukYdDl_`da zw7eB`Cx%C0xD$|?2S}070e`O>HM(N}PTzH{uYbZ=A5=6K>FA^){!9z6i0V?PF~5e5 z(d*^?32zN3hvsoYn;flDQmCj%)^@=I{k`GU&edMJ_AH4WPedn0stK5xi8VWUg)FC@ zm2?r+tC_zMt<7^;uomIE9)HQ}2`h_VnJ*l#)603PfJ+9HGGc|$+t=kM`A+0;sZKz1 zi{p0_R?ku~l*oVYwbte7PSRX|MAXKtYOIj_-oc!;x{|Fd)19D@Vn&d|Y%nutGDT{y zkOq906u|FHj0{%YpeP&WVA&DrmuO^=KKj_QJd{*@VDE zGj9KpBNsj%N=a*vO=Ja-;Ke_qP%6rV@kk}2T#KYrXr)Q=lR_!jl@q>`NMjcN433=3 zB9T@`DUw4idWxMBKRHdydm@O-;p=`o%5a>@l-cwEb)IfcPa#Ybg}NoH4UnS21YTDz zXdZx~3^%?B2N{v{j)9XfAkGc!*weTRpbR*p?>2q(Xoy4XyuffDc<=cJfz|C)pp{*4^aHSbdr1FOgQZrlbrnq>fHHIcbvuYDe}wDrp==Ow z5%pDmBJ+H@x=Clz3$qO<_VIsL2JiD~w5E%~-stHi^9CN|ZZ(f!gTvGq&_=rRfwO-B`nwTLDAwSoN|_bDl1uRNNH$hI4T)H=81v7QfWoIY1yyWpE=wOP`x<#qbqii>Z8eRH213a1 zQVe0-oSpkMJ0r3re1s5xijqyK*EcWGkw&XjJg`4G*3j}__ZYCApD;675QM#;cjxcM zeNXmnPi`l4t_j-+`2O=PouGwIkzndYT-&rq5FDwAk=k@mbNYVmt9mY^7?OCMN z+0NHil)G^+g;T7*K3rjKxw08Og%%14zNOh#*0Y*RFFS8b%aqp8F^_4(zLRuklwT7h z&mNh%Jdc$9?C-+(>(h5+e>+d6{SS4%COPp%6++8boph#AEWWGAhEx%_J|f8Ao>fLjjv17m>I6lXdc1!`8rkL zybHRdva3>!USB`f6W3O-wr&_D=AN_P?#&bEdstcsfrKksX7>)ZLo#&hhW?g;K-NhOEVf#vG)sMfIi7}V-(eTCI z*itk@cnoBF$~22znzR%RY*NZSj9Y;B)=5dQEV-dvhuAtC%wHItm+i~uv;N$)B*@&= z>SdqCLn<(2Ljz2XsMmHffkjQPMkbOxQJSrVC?nLtK@T>W-(DOHWl-erL9(mre^xc` zz&weYU2}S8Q4-s}W=!2%YJ-Zb7f=2yt~R)Q&WKt6_)*e$b?(c$$AJul!TBbUTxn&~ z88z+S3Lh};r>F9eVxP!0@HfwQ9XQdQu?S@XKIktqB_8 z@WnilU<=3kB-ov4={!^CiQ6uyVIxAIF72234SFZW6H{QO}Ls}Ad^sD!{ zBoQdtRvcq6@_5LhuY#H*EE>xuCZlRGR+T8k<#xDWi1mC9kDJt=U@rO2Lu4=GbIIdsY>`AA&IW=s<55Xj7UH|Y# zL76B6JXJFRcH1bYYt8e~JMP2u#~``fFLZlo^xVyk?R2{+t^=A+pI9OBipWbFw@#eawicdXpXfd z{6=Q?<{Q~@A==v$#5XqeK+4mpSPVYrS#7w2b7X3r-;bWhzk5~TAuRdvuFj+BI|?h_ z?(K=gC!9de?54D$?=tDt1@6*@MGs|aJ>VH!O|fr~Yx44zYw7M{3M>ml5S>jP!^G&8 zll062M$(L|uP)21`xamd+x+gYdF2)$)T#5`!8xD8AMS2r#D~g*+fotJcEtkDvtgA% zLQ`T%P(t@QSVx+AX*GNe7Rm=8R3^bB2Vx>>mc_!|JoX;^;UWmfOQt z^yEcLN?gX+SZBRL-am>@wZ0Y>(uo8Qc=DgGF25tvvwbc1lU@flDPoKFE2#M6?je&; z98ay(ASVY>rgKdX;*cf>X0~7yi$G0l`^MEEArC>GSTPPZjd(Zq-F!w?nj4ysj{~rB z=yv2zHQ{uEpdfZQk}lCO2FGmGBFI!#;5UESAe2q z-^_7K8)3!j3UBG-q6Y=h{4NoWIkn`aO(UW)mRA<>5 z8+9_1O6wH^&m>I&JlMvj;&Q>(~yy;q5@y7sodN z@TsT?`c4)ESInUYH4Q!skr-FSYm*1}w$~dxiE8cR9Jfw(*v5p5HUHPif|IQqySi<} z>DAPf40yQGH=2+m&-85=yI8mRGGOJfEW-c>l}MjkK!-im;lr&elZSp&-@nq2_{Df| zE^?cBj`ALF&*m(;Bx+`kz~9SW$gl87F~mU%v$H$HbFkhu{>u7CUt4gqLUx99b4S1P zmxtiy@KZPOs0Wmgxz{E`Zeg}fVqz6(Q)W}J*PFO0)cU-e97v5PWLsR#>=G&Jyvv7+ zd!9P|P4=}gh|4VgvH4h{zTo$fRToG5 zUGs*+14y)J6k0X=P4PCjnyZ2C@j~;jW|jgU_9FQ&ZP4QD`^$+MvE|_`7M*Q6o&wX9 zLP6@V1Hjab3rJ2{d@%TErnB{@(sU)xf)u_K*ycj_)`2Y<)w(Hb{@UnTLm}v5xJrAC z&4-%ZkXJgG`w3|WIw`?h@1b%>vv5Gsr6ekY0Hs258C&T3TArfRYsD~_nnS`9!oyfA-x+^??(3>pEp1F7T_c?1N zjqjAZ?c--}gc~X}H7#@BHA^M1f}q9)L|XXUO3G8E(hU9f@4cI1_cpB*vdFV&esc~{ z^-W0TIz^dDVNfpsJQbsJWVaj964B3tEg;2OHt$x3w)kxl+E?rxezEX9wTO5*azMt8 zT@?fl=CZZOwwN(RG`FP&OmwK#qR#mRbXMig=swzyoevx{ER6M}K0QK?_|!*pz;@1c zQy0$O zJxUa&rx%38_VH}0PwFK&qe0v{D@K%!LbYAgXdFr)b{hV+63&-z8dirou}6IMgpA;4(_ z^=WGfT`IWf90Z>j$BWq$ddbMSfL>c~O2Y#s7SM(1-v!}3EcVLdh=r=HJYVdK-2O0i zv*?&#`_hdcmn2y3;P+e5s}T#3jc~hKh9=!)Dq)J24UAy-Kci9P3L6$>m7q_KCn3j#hvSWQ(XMsPWaey5}<#l?6_@ra9?rmXUDiI z=EN}NFH`JQ3{syHxc2Zc#Z+ffM&|fHkLNcm@U}U`-o2$=)$LiV3a0k^Feobe5G!6w?$cU%8Xgl+RTjE7 zl%i`EWFo!A6|Gax9#?cC3!VZ+wG)Y{;xc{MJHj+hHUm)>8gK zVQ|UImT-i37Hu6qCr3B22HL&iJ3qOLg2$K{_xrd-d0QXcPOHNAjY!lxB##NH`isF1 zl1kB!#1t1}t0=xB$K}})usS^QO>^Pm${vCLNoKYmrjHQudu-2-^wKD>gXyq;kj6RN z|3g$JblEd;^p$d}5Wf018btFL1H`2y|LP&Hj`QBx9isA;N>7eh;sF>dr`8}?Qu5+P zM$T}ckG)Zq^RTgqrP^wR&l>V|mmssjm)a8Sa4D5l{+7mf%_~CbW1pMQk*&9$HHtv5 z0%zSIybd3e*C=kGJLs~qCWk7I51`MCh~%2)qK%T~wsRMX9+=(?i&6bmt99KEg6Uc( zVRyiR8bS1hXGmh|`~Wv}f$Sn*fPTu7A(YSi8FYp?MG@#4qcH@?+70r6OCSkXiz zR&G1q86WaEYhyqp1VpZIfy0h5NL^Jl!A_?MB#xeC7{W)OIX|?wQ2lJ^aEgIG=Hqg2 zbM@mOiC@bk8m>x225_PY%lLdsijp?fsKfOgrq`1kDn(^?>DS4w!Nr}WOL1-scxxqv z(pW_!iiwrLwbPP9s2pGNjkK>wIQNpb+K;n3;SP;#n}_G;E`EZy(Wyd^*HORyA4`77 zUF2385Q{oHB>M2{&S}9AY@7DUx2sBls7+R+Nox%;;ze&IX^h0f9r$K@E=)Ny*ygH9 zpiV4*d~2?p2t%O0iE|{xL6tfH6>ByxZ=`B=j1PS+5R&u(LJVPxc zgQe`s>h3kAyBJ&gRj2DqN!XGzfvrtch@O964(6CSU6OAnzfOzQxRd=?-P@Bz+a z7Z3=pYAvQ=(bm;^kZ|K#%8u&ZwGOm)1B9fY5z?A2boUP?wiQZ*AYVE_#Tmb=FBGVp zc}T4si>QE~T~LR4m}1IgD_fm(5*m9SeeWaI$l(xh{Up^K{M90o4@8v$-Sc;*>bb6b zxuj9^E+JOWfX?KZ6l91Xl8~N&6*mG^&N7}Dj=!Kf1&ijWF)4EyR?9HX0v-#78%K2W zY9;Uq@rgg2(yC_-;(!c!!KM{>t{KUEjPB** ztCStMK5t!q5xkt2xPJHR@L#;|RQr?iy;W9f{iIo;cb^(%Mgga zb-jGUx!3qiHv}CS&X6Sj_KCLp38?9?&<LLIxxL5bJVc^B1&`kGTj|7s^J&>qcx+yJvD`b`V4rHN zcM@cH@!%?|uGrxoUZlYuG^Rg`aUwN&*I3~k%bVz}ce>ka->qMDwc?2#z8Ok0p-B$^0O@DH67+$sYGlcH3W9Mfm)FB zp#GwUxDwgmFT;#nt*5H`OUDNOgz(DI^}=7$c=ssUjFxkoIf))CX`#6<$LwfYurN+n zh9h2GZiO*!+`6_(OTfy`fWmwsyW_?mHzuW31JA`zHe|T`y_MC*fzbCM$8N;dQCsmp z0=7>s-Jm2k+d{yAkvBdLrE*sic;Uv zLNjg{h9czd>*`IpZzQHN_`K1dr6il1k&D$+B0sO6ZP0sN*hxhWmpD>3k=M?-qc^MU zMb7HkFCN7o_Zk<%>*sh!?tCU)b>l3eFS-cwv(GvB#UEFHuN$6@6twrClzQ>~$d3(n z`S>kS^144-aYwy%7UVknW_o3nRc;+vh>vjhWM)P{0LL;%=M4?RS$Du~&We`p3?_$Q z4GAwKlMkrzG%J;cKNAH_RM-Uq- z_mMCDFI$DUa8 z-MsfV;;=crmnC#rr}?5^I&j(Zrs~_9*2XujN218z4Jo|lGaSYYoqbAskusB0+p~*= zj3=Yjk_TUEBu^#b1yc=zCm0-o$#r@=d@8-GM7ufcU8Egm0yoc;R3}p-$Z?nfbp7lvgs$3?~G)hN~%z&VGL_y)#Y7LClp1}R6wOLxDk(TU? zFF14`w2T6)2-4XmL-e~nLI|k9nDbf;zB*hSYKaXg8Cq8(X$-e76k%!tgwT-g04wiR zbdkj`DNCehs4C$FBvxVQP^{EI;k5+=nEBkhm-XEGR@_1|866rtYM}DQhb5-R&6LsOGdW~2xCWzb3s)}E7 zfuui!#lHBV=3I!xV;ae0AsYHtR`<*yvBnKxz%FW8Atap`YOs`1EiFOr8KLvtzMDO- zb%h*obR<;p@sGkj-00cm+JQt<3ZAU1Vv=BDf1n~ty5<*1z_r1YGD90h&S=WtYw}~w zAkV2W$nYRtJUr5zB4kjcn@C{b!XSe3NvPF++mI?e4yZB{?RwUcFbq85e61MLrUo{! z3`K3rCZM7`qozNr8u4qJl%l}e2D3%-f(J|YxJTuO7a+|p2n@98>`B+Dc_uooa&kT{ zannvv#G|3W!YHX37$_>s%ib2TA2SUFw9%F0>`I&2kk+BV4j@+wxitifbgR90yNl@0 zbYJxW#ERU(KNYdn_!`LEuF9;qNKk&8{gQrM*y6UH0ljwP?Io8HYg6OEVMhcG_M!k_ zAQ2P9<;Vtm6Rnt^sh+M<13Ya}YaTgszM`KLQv4#ZpYZHFh zYK-_;^dY>Xm_n963Xrx^c;8^>mBMtflOl#g!Jnok6_U#%JPm{Gmo-@)!A{04oY3<;m$^Ln+&_265BJpNhAK zK8F-QUB)*D7y}ammhMAlk~EYxl)*!GV3={hp+bt47D_1|U`v0Qi+t_S1fxZxob`uAF0jV(~!4x``BC)ogsPbo>agq$iw6Hl3B7JJKX zj22Y#s8_BASJAK8Ar#eMLj-=Ib`Ne&!>2=LzsAe?{5ATTiSf2t|yU)}$g2ZH)H z5A^@AK>xSNf063?vC-9H{zc_*a(1(Kl*FMBQZ}49Y#lOD)0n&ak AqW}N^ literal 0 HcmV?d00001 From bc4f2cf1c0967e2e5b5acfb09dff6ba587dd9480 Mon Sep 17 00:00:00 2001 From: "David P. Steelman" Date: Wed, 8 Oct 2025 07:58:46 -0400 Subject: [PATCH 2/5] LIBDRUM-991. Limit file size process by EtdLoader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Kubernetes, EtdLoader uncompresses files into an ephemeral volume with limited space. If an uncompressed file exceeds the ephemeral volume size, the pod will reboot. To prevent this, added a “drum.etdloader.maxFileSize” configuration property. A Zip file containing a ZipEntry that is larger than this size will not be processed, and a prominent error message will be added to the log. Updated EtdLoaderTest with a test of the “drum.etdloader.maxFileSize” property and error message. https://umd-dit.atlassian.net/browse/LIBDRUM-991 --- dspace/config/local.cfg.EXAMPLE | 3 + .../edu/umd/lib/dspace/app/EtdLoader.java | 77 +++++++++++++++++++ .../edu/umd/lib/dspace/app/EtdLoaderTest.java | 23 ++++++ 3 files changed, 103 insertions(+) diff --git a/dspace/config/local.cfg.EXAMPLE b/dspace/config/local.cfg.EXAMPLE index c5549b89193e..1c21a4d032c8 100644 --- a/dspace/config/local.cfg.EXAMPLE +++ b/dspace/config/local.cfg.EXAMPLE @@ -405,6 +405,9 @@ drum.eperson.subscription.limiteperson = drum.etdloader.eperson = load_diss@drum.umd.edu # UUID of "UMD Theses and Dissertations" collection drum.etdloader.collection = ba3ddc3f-7a58-4fd3-bde5-304938050ea2 +# Maximum (uncompressed) size of an entry in an ETD Zip file (in bytes) +# Comment out, or use -1 for unlimited +drum.etdloader.maxFileSize=15032385536 # Environment Banner configuration # Leave blank on production environment diff --git a/dspace/modules/additions/src/main/java/edu/umd/lib/dspace/app/EtdLoader.java b/dspace/modules/additions/src/main/java/edu/umd/lib/dspace/app/EtdLoader.java index 5a5100ffad7a..502bc78d7483 100644 --- a/dspace/modules/additions/src/main/java/edu/umd/lib/dspace/app/EtdLoader.java +++ b/dspace/modules/additions/src/main/java/edu/umd/lib/dspace/app/EtdLoader.java @@ -123,6 +123,12 @@ public class EtdLoader { private static Logger log = org.apache.logging.log4j.LogManager.getLogger(EtdLoader.class); + /** + * Configuration property for setting the maximum file size that can + * be processed. + */ + public static final String MAX_FILE_SIZE_CONFIG_PROP = "drum.etdloader.maxFileSize"; + // Suppress default constructor private EtdLoader() { } @@ -147,6 +153,10 @@ private EtdLoader() { static EPerson etdeperson = null; + // Maximum ZipEntry file size that can processed. Defaults to -1, which + // is unlimited. + static long maxFileSizeInBytes = -1L; + static SimpleDateFormat format = new SimpleDateFormat("MM/dd/yyyy"); static Pattern pZipEntry = Pattern @@ -211,6 +221,9 @@ public static void main(String args[]) throws Exception { String strCollection = configurationService .getProperty("drum.etdloader.collection"); + String maxFileSizeStr = configurationService + .getProperty(MAX_FILE_SIZE_CONFIG_PROP, "-1"); + log.info("DSpace directory : " + strDspace); log.info("ETD Loaeder Eperson : " + strEPerson); log.info("ETD Loader Collection: " + strCollection); @@ -242,6 +255,19 @@ public static void main(String args[]) throws Exception { + strEPerson); } + if ((maxFileSizeStr == null) || maxFileSizeStr.isBlank()) { + throw new Exception(MAX_FILE_SIZE_CONFIG_PROP + " not set"); + } + try { + maxFileSizeInBytes = Long.parseLong(maxFileSizeStr); + } catch (NumberFormatException nfe) { + throw new Exception( + "%s of '%s' is not parseable as an integer".formatted( + MAX_FILE_SIZE_CONFIG_PROP, maxFileSizeStr + ) + ); + } + // Open the zipfile ZipFile zip = new ZipFile(new File(strZipFile), ZipFile.OPEN_READ); @@ -261,6 +287,8 @@ public static void main(String args[]) throws Exception { } context.complete(); + } catch (ZipEntryTooLarge zetl) { + log.error(zetl.getMessage()); } catch (Exception e) { log.error("Uncaught exception: " + e.getMessage(), e); } finally { @@ -790,6 +818,24 @@ public static Map readItems(ZipFile zip) { Matcher m = pZipEntry.matcher(s[0]); if (m.matches()) { + if (!isFileSizeWithinLimit(ze, maxFileSizeInBytes)) { + long uncompressedSize = ze.getSize(); + String msg = """ + =============================================== + ERROR: Zip file entry too large + + The file '%s' in '%s' + is too large at %d bytes, exceeding the limit + of %d bytes set in the '%s' + configuration property. + Skipping. + =============================================== + """.formatted( + strFileName, zip.getName(), uncompressedSize, + maxFileSizeInBytes, MAX_FILE_SIZE_CONFIG_PROP + ); + throw new ZipEntryTooLarge(msg); + } // Get the item number if (strItem == null) { @@ -818,6 +864,27 @@ public static Map readItems(ZipFile zip) { return map; } + /** + * Returns true if the ZipEntry is less than or equal to the given + * maximum file size limit, false otherwise. + * + * The maximum file size is typically controlled by the + * MAX_FILE_SIZE_CONFIG_PROP configuration parameter. + * + * @param ze the ZipEntry to examine + * @param maxFileSizeInBytes the maximum allows file size in bytes. Use + * -1 to indicate unlimited file size. + * @return + */ + protected static boolean isFileSizeWithinLimit(ZipEntry ze, long maxFileSizeInBytes) { + // Negative number indicates unlimited file size + if (maxFileSizeInBytes < 0) { + return true; + } + + return ze.getSize() <= maxFileSizeInBytes; + } + /**************************************************** reportCollections */ /** * Report missing mapped collections @@ -888,3 +955,13 @@ public static String toString(Document doc) throws java.io.IOException { } } + +/** + * Exception thrown when the uncompressed size of a ZipEntry in a Zip file + * exceeds the size specified in MAX_FILE_SIZE_CONFIG_PROP. + */ +class ZipEntryTooLarge extends RuntimeException { + public ZipEntryTooLarge(String message) { + super(message); + } +} diff --git a/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java b/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java index 240add1e5a9e..f73c2d067d9f 100644 --- a/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java +++ b/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java @@ -119,6 +119,22 @@ public void testMainEmbargoedItem() throws Exception { assertThat(logOutput, containsString("Embargoes: 1")); assertThat(logOutput, containsString("Embargoed until Tue Jun 26 00:00:00 IST 3027")); } + + @Test + public void testMainMaxFileSizePropertyHandling() throws Exception { + int maxFileSize = 1000; + testEtdLoaderConfig.setEtdLoaderScriptProperties( + "/edu/umd/lib/dspace/app/etdadmin_upload_test_one_item.zip", + eperson, maxFileSize); + + String[] args = new String[0]; + + EtdLoader.main(args); + String logOutput = etdLogger.getLog(); + assertThat(logOutput, containsString("Records read: 0")); + assertThat(logOutput, containsString("Records written: 0")); + assertThat(logOutput, containsString("ERROR: Zip file entry too large")); + } } /** @@ -204,8 +220,15 @@ public void setEtdLoaderScriptProperties(String etdZipFile, EPerson eperson) thr System.setProperty("etdloader.zipfile", zipFile.getCanonicalPath()); configurationService.setProperty("drum.etdloader.eperson", eperson.getEmail()); configurationService.setProperty("drum.etdloader.collection", testCollection.getID().toString()); + configurationService.setProperty("drum.etdloader.maxFileSize", "-1"); } + public void setEtdLoaderScriptProperties(String etdZipFile, EPerson eperson, int maxFileSize) throws Exception { + setEtdLoaderScriptProperties(etdZipFile, eperson); + configurationService.setProperty("drum.etdloader.maxFileSize", "" + maxFileSize); + } + + protected void addMetadataField(Context context, String metadataSchemaName, String element, String qualifier) throws Exception { MetadataSchema metadataSchema = metadataSchemaService.find(context, metadataSchemaName); From ff6c4906c4782a467189429fa73080b01e6ac897 Mon Sep 17 00:00:00 2001 From: "David P. Steelman" Date: Wed, 8 Oct 2025 08:01:52 -0400 Subject: [PATCH 3/5] LIBDRUM-991. Report errors to enclosing script Modified EtdLoader to exit with a non-zero status code when an error occurs in the processing. Modified the "load-etd" script to capture the status code from the EtdLoader and use it as its exit status. Modified the "load-etd-nightly" script to print an error message and continue to the next file (leaving the file with the error in the "incoming" directory, instead of moving it to the "processed" directory), and return an exit status. Modified "script-main-wrapper" to capture exit status and modify the email "Subject" line to include "- ERROR(S) OCCURRE" on a non-zero exit status. https://umd-dit.atlassian.net/browse/LIBDRUM-991 --- dspace/bin/load-etd | 6 ++++-- dspace/bin/load-etd-nightly | 13 +++++++++++++ dspace/bin/script-mail-wrapper | 18 ++++++++++++++++-- .../java/edu/umd/lib/dspace/app/EtdLoader.java | 11 ++++++++++- 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/dspace/bin/load-etd b/dspace/bin/load-etd index 02034cc99934..81489f103890 100755 --- a/dspace/bin/load-etd +++ b/dspace/bin/load-etd @@ -45,9 +45,11 @@ $ENV{CLASSPATH} .= $classpath_separator.$prev_classpath if ($prev_classpath ne " #print $ENV{JAVA_OPTS}; #print (join ' ',@cmd) . "\n"; -system(@cmd); -exit 0; +# Using ">> 8" to recover the actual Java exit status code +$exit_status = system(@cmd) >> 8; + +exit $exit_status; ########################################################## GetCmdLine diff --git a/dspace/bin/load-etd-nightly b/dspace/bin/load-etd-nightly index 89ff63e0b331..9d7fa174800f 100755 --- a/dspace/bin/load-etd-nightly +++ b/dspace/bin/load-etd-nightly @@ -18,6 +18,8 @@ bindir=$(dirname "$0") incomingdir="$datadir/incoming" processeddir="$datadir/processed" +error_occurred=0 + # Check for incoming files if ls "$incomingdir"/etdadmin_upload_*.zip &> /dev/null; then echo "Files found in $incomingdir" @@ -29,8 +31,17 @@ if ls "$incomingdir"/etdadmin_upload_*.zip &> /dev/null; then echo echo "======================================================================" echo "Loading archive file: $incomingdir/$zipfile" + "$bindir/load-etd" -i "$incomingdir/$zipfile" + # If an error occurs, continue with the next item, leaving the file with + # the error in the "incoming" directory. + if [ $? -gt 0 ]; then + echo "Error: Failed to load $zipfile. Continuing with the next file." + error_occurred=1 + continue + fi + # Move archive to the processed directory if [ ! -d "$processeddir" ]; then mkdir -p "$processeddir" @@ -40,3 +51,5 @@ if ls "$incomingdir"/etdadmin_upload_*.zip &> /dev/null; then mv "$incomingdir/$zipfile" "$processeddir" done fi + +exit $error_occurred diff --git a/dspace/bin/script-mail-wrapper b/dspace/bin/script-mail-wrapper index 4d467f49a755..a6935a0c68db 100755 --- a/dspace/bin/script-mail-wrapper +++ b/dspace/bin/script-mail-wrapper @@ -65,10 +65,24 @@ echo LOG_FILE_PATH=\'$LOG_FILE_PATH\' echo SCRIPT_ARGUMENTS=\'$@\' # Call the script being wrapped -$SCRIPT "$@" 2>&1 | tee "$LOG_FILE_PATH" || true + +# Temporary file to store the exit code from the subshell +EXIT_CODE_FILE=$(mktemp) +{ + $SCRIPT "$@" + echo $? > "$EXIT_CODE_FILE" +} 2>&1 | tee "$LOG_FILE_PATH" + +SCRIPT_EXIT_CODE=$(cat "$EXIT_CODE_FILE") +rm "$EXIT_CODE_FILE" + +SUBJECT_LIST="$SCRIPT_BASENAME: $SERVER_TYPE" +if [ $SCRIPT_EXIT_CODE -ne 0 ]; then + SUBJECT_LIST="$SCRIPT_BASENAME: $SERVER_TYPE - ERROR(S) OCCURRED" +fi # Mail the log, passing all non-JSON unchanged, and filtering out DEBUG messages jq -R -r '. as $line | try (fromjson | select(."log.level" != "DEBUG") | .message) catch $line' $LOG_FILE_PATH | \ -$MAIL_SCRIPT_DIR/mail -s "$SCRIPT_BASENAME: $SERVER_TYPE" "$EMAIL_ADDRESS" +$MAIL_SCRIPT_DIR/mail -s "$SUBJECT_LIST" "$EMAIL_ADDRESS" echo Done running `basename $0` script diff --git a/dspace/modules/additions/src/main/java/edu/umd/lib/dspace/app/EtdLoader.java b/dspace/modules/additions/src/main/java/edu/umd/lib/dspace/app/EtdLoader.java index 502bc78d7483..dba677ccb559 100644 --- a/dspace/modules/additions/src/main/java/edu/umd/lib/dspace/app/EtdLoader.java +++ b/dspace/modules/additions/src/main/java/edu/umd/lib/dspace/app/EtdLoader.java @@ -205,7 +205,7 @@ private EtdLoader() { */ public static void main(String args[]) throws Exception { - + boolean hasError = false; try { // Properties @@ -289,13 +289,22 @@ public static void main(String args[]) throws Exception { context.complete(); } catch (ZipEntryTooLarge zetl) { log.error(zetl.getMessage()); + hasError = true; } catch (Exception e) { log.error("Uncaught exception: " + e.getMessage(), e); + hasError = true; } finally { log.info("=====================================\n" + "Records read: " + lRead + "\n" + "Records written: " + lWritten + "\n" + "Embargoes: " + lEmbargo); } + + // Exit with a status code of 1 if an error has occurred, to signal to + // the "load-etd" script that the item was not successfully processed. + if (hasError) { + log.error("Exiting with return code of 1"); + System.exit(1); + } } /******************************************************** addBitstreams */ From 2ecb1ee4b9dae9d81a83c48ddddf880454e5cec6 Mon Sep 17 00:00:00 2001 From: "David P. Steelman" Date: Wed, 8 Oct 2025 08:02:20 -0400 Subject: [PATCH 4/5] LIBDRUM-991. Add additional Etd Loader documentation https://umd-dit.atlassian.net/browse/LIBDRUM-991 --- .../docs/DrumEmbargoAndAccessRestrictions.md | 2 +- dspace/docs/DrumEtdLoader.md | 130 ++++++++++++++++++ dspace/docs/DrumFeatures.md | 2 + 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 dspace/docs/DrumEtdLoader.md diff --git a/dspace/docs/DrumEmbargoAndAccessRestrictions.md b/dspace/docs/DrumEmbargoAndAccessRestrictions.md index 80ee47f2a3d4..e45eac326414 100644 --- a/dspace/docs/DrumEmbargoAndAccessRestrictions.md +++ b/dspace/docs/DrumEmbargoAndAccessRestrictions.md @@ -94,7 +94,7 @@ system simply relies on those administrators maintaining both policies. When ingesting ETD items from ProQuest, the bitstreams will either have no embargo, or a specific date for lifting the embargo. For embargoed items, the -ETD loaded automatically adds both policies. +ETD loader automatically adds both policies. ### Embargo List diff --git a/dspace/docs/DrumEtdLoader.md b/dspace/docs/DrumEtdLoader.md new file mode 100644 index 000000000000..e3c16cfee6cc --- /dev/null +++ b/dspace/docs/DrumEtdLoader.md @@ -0,0 +1,130 @@ +# DRUM ETD Loader + +## Introduction + +The DRUM ETD Loader is UMD custom functionality for processing files uploaded +from ProQuest into DRUM. + +"ETD" stands for "electronic theses and dissertations". + +## ETD Workflow + +ProQuest periodically uploads Zip files to DRUM via SFTP to a specific +"incoming" directory for processing. ProQuest sends an email to +"" with a list of the ETD files that were delivered +(or failed to deliver). + +Each Zip file contains + +* An XML file containing the metadata for the theses/dissertation +* One or more PDF files + +The "load-etd-nightly" cron job processes each Zip file in the "incoming" +directory, adding them to DRUM. Successfully processed Zip files are moved to a +"processed" directory so that they is not processed again. + +Upon completion, the "load-etd-nightly" sends an email of the log messages +generated by the cron job. + +If an error occurs when processing a Zip file, the Zip file will be "skipped" +and remain in the "incoming" directory, and will be processed again on the next +cron run. + +## ETD Loader Components + +The ETD Loader functionality consists of: + +* an SFTP server for receiving files from ProQuest +* The "load-etd-nightly"/"load-etd" scripts that loads the Zip files +* Java classes in the DSpace "additions" modules +* Angular components in the "umd-lib/dspace-angular" repository supporting + the creation/editing/deletion of "ETD Departments". +* A special "dspace/config/log4j2-etdloader.xml" Log4J configuration for + controlling the log format +* Configuration properties in "local.cfg" + +## Related Documentation + +* [DrumCronTasks.md](DrumCronTasks.md) - contains information the + "load-etd-nightly" cron job that loads the Zip files received from ProQuest. +* [DrumEmbargoAndAccessRestrictions.md](DrumEmbargoAndAccessRestrictions.md) - + for information on embargo functionality. +* [DrumLogging.md](DrumLogging.md) - contains information pertaining to the ETD + logging functionality and email. +* [DrumTestPlan.md](DrumTestPlan.md) - contains test steps for verifying the + "ETD Departments" CRUD functionality, and SFTP connectivity. +* [dspace/src/main/docker/README.md](../src/main/docker/README.md) - contains + information about the SFTP Docker container + +## ETD Departments + +---- + +**Note**: "ETD Departments" is the human-friendly GUI-based name -- the +Java and Angular source code uses "ETD Units". + +---- + +The XML metadata provided by ProQuest includes one (or more) "DISS_inst_contact" +entries, for example: + +```xml + + + ... + + ... + + ... + English Language and Literature +``` + +Each "DISS_inst_contact" must match an existing "ETD Department" in DRUM, which +is used to map the ETD into the appropriate DRUM collection. + +Each ETD is also added to the DRUM collection specified in the +"drum.etdloader.collection" configuration property. + +## ETD Loader Configuration Properties + +The following properties are used to configure the ETD Loader. + +### drum.etdloader.collection + +The UUID of the collection that all ETD submissions are added to (in addition +to the collection specified in the "DISS_inst_contact" XML property). + +### drum.etdloader.eperson + +The email address of the DRUM EPerson used to load the ETD submissions. + +### drum.etdloader.maxFileSize + +Operational parameter that sets a limit (in bytes) on the size of files that +can be processed by the ETD Loader. + +This parameter is necessary to prevent the ETD Loader from uncompressing a +Zip file entry that exceeds the resource limit of "drum-cron-ephemeral-vol" +ephemeral volume in Kubernetes (which would cause the pod to reboot). + +If a Zip file contains an entry that exceeds the limit, the entire file will +be skipped, and a message added to the ETD log (and email). + +This parameter is optional -- if not set (or set to "-1") no file size limit +will be enforced. + +### drum.mail.etd.recipient + +Email address that receives the output message from the ETD Loader. + +### drum.mail.duplicate_title + +Email address that receives notifications of duplicate titles from the ETD +Loader. + +## SFTP + +A ProQuest-provided public key that is added to the SFTP configuration to enable +ProQuest to upload files. + +See the "docs/Secrets.md" document in the "umd-lib/k8s-drum" repository. diff --git a/dspace/docs/DrumFeatures.md b/dspace/docs/DrumFeatures.md index 99e8fc521bb0..b423f02533e3 100644 --- a/dspace/docs/DrumFeatures.md +++ b/dspace/docs/DrumFeatures.md @@ -31,6 +31,8 @@ information. ## Electronic Theses and Dissertations (ETD) +See [dspace/docs/DrumEtdLoader.md](DrumEtdLoader.md) for additional information. + * LIBDRUM-671 - "ETD Department" CRUD functionality * LIBDRUM-680 - Loader for loading ProQuest ETDs into DRUM * transform ProQuest metadata to dublin core From e3d31d724ebd14ce88486e7840a398b0543c13e1 Mon Sep 17 00:00:00 2001 From: "David P. Steelman" Date: Wed, 8 Oct 2025 14:46:40 -0400 Subject: [PATCH 5/5] LIBDRUM-991. Removed "testMainMaxFileSizePropertyHandling" test Removed "testMainMaxFileSizePropertyHandling" because the code under test runs "System.exit", and that interferes with running the tests in Jenkins, where the build fails with the following error: > [ERROR] org.apache.maven.surefire.booter.SurefireBooterForkException: > The forked VM terminated without properly saying goodbye. > VM crash or System.exit called? Investigated ways to work around this, see especially https://stackoverflow.com/a/309427, but none of the solutions seemed particularly future-proof, or required additional third-party libraries which don't look to have been updated in a while. https://umd-dit.atlassian.net/browse/LIBDRUM-991 --- .../edu/umd/lib/dspace/app/EtdLoaderTest.java | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java b/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java index f73c2d067d9f..847b5cb4ffee 100644 --- a/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java +++ b/dspace/modules/additions/src/test/java/edu/umd/lib/dspace/app/EtdLoaderTest.java @@ -119,22 +119,6 @@ public void testMainEmbargoedItem() throws Exception { assertThat(logOutput, containsString("Embargoes: 1")); assertThat(logOutput, containsString("Embargoed until Tue Jun 26 00:00:00 IST 3027")); } - - @Test - public void testMainMaxFileSizePropertyHandling() throws Exception { - int maxFileSize = 1000; - testEtdLoaderConfig.setEtdLoaderScriptProperties( - "/edu/umd/lib/dspace/app/etdadmin_upload_test_one_item.zip", - eperson, maxFileSize); - - String[] args = new String[0]; - - EtdLoader.main(args); - String logOutput = etdLogger.getLog(); - assertThat(logOutput, containsString("Records read: 0")); - assertThat(logOutput, containsString("Records written: 0")); - assertThat(logOutput, containsString("ERROR: Zip file entry too large")); - } } /**