diff --git a/.gitignore b/.gitignore index 2248714..eeed101 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ docroot/output/*.xml *.db /docroot/csv/abstracts/*.csv +/docroot/csv/* \ No newline at end of file diff --git a/README.md b/README.md index c4dd8bd..3b02e26 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# CSV to OJS XML Import for OJS 3.3.0 +# CSV to OJS XML Import for OJS 3.3.0 (and 3.4, sort of) This application will convert a CSV file into the OJS XML native import file. The XSD is included with this project in the `docroot/output` directory. Sample CSV files for both users and issues are included in the `examples` diff --git a/app/classes/IssuesXmlBuilder.php b/app/classes/IssuesXmlBuilder.php index 0f8ba91..32daf0b 100644 --- a/app/classes/IssuesXmlBuilder.php +++ b/app/classes/IssuesXmlBuilder.php @@ -7,7 +7,8 @@ define("ISSUE_COVERS_DIR", "csv/abstracts/issue_cover_images/"); -class IssuesXmlBuilder extends XMLBuilder { +class IssuesXmlBuilder extends XMLBuilder +{ /** @var array $_sectionAbbreviations */ private array $_sectionAbbreviations = array(); private int $_iteration = 0; @@ -25,7 +26,8 @@ class IssuesXmlBuilder extends XMLBuilder { * @param string $articleGalleysDir * @param string $user */ - public function __construct($filePath, &$dbManager, $issueCoversDir, $articleGalleysDir, $user) { + public function __construct($filePath, &$dbManager, $issueCoversDir, $articleGalleysDir, $user) + { parent::__construct($filePath, $dbManager); $this->_issueCoversDir = $issueCoversDir; $this->_articleGalleysDir = $articleGalleysDir; @@ -35,14 +37,16 @@ public function __construct($filePath, &$dbManager, $issueCoversDir, $articleGal /** * @param int $iteration Current loop of all files being written */ - public function setIteration($iteration) { - $this->_iteration =$iteration; + public function setIteration($iteration) + { + $this->_iteration = $iteration; } /** * Issue builder where one builder results one xml file for up to ISSUES_PER_FILE issues */ - public function buildXml() { + public function buildXml() + { $this->getXmlWriter()->startElement("issues"); $this->_setXmlnsAttributes(true); @@ -67,7 +71,8 @@ public function buildXml() { * * @param array $issueData */ - function writeIssue($issueData) { + function writeIssue($issueData) + { $this->getXmlWriter()->startElement("issue"); $this->_setXmlnsAttributes(); $this->getXmlWriter()->writeAttribute("published", "1"); @@ -85,7 +90,8 @@ function writeIssue($issueData) { * * @param array $issueData Metadata about a single issue */ - function writeIssueMetadata($issueData) { + function writeIssueMetadata($issueData) + { $this->getXmlWriter()->startElement("issue_identification"); if ($issueData['volume'] != "") { @@ -108,13 +114,13 @@ function writeIssueMetadata($issueData) { $this->addLocaleAttribute(); $this->getXmlWriter()->writeRaw(xmlFormat($issueData["issueTitle"])); $this->getXmlWriter()->endElement(); - - if(trim($issueData["issueTitle_2"])!=''){ - $this->getXmlWriter()->startElement("title"); - $this->addLocaleAttribute($issueData["locale_2"]); - $this->getXmlWriter()->writeRaw(xmlFormat($issueData["issueTitle_2"])); - $this->getXmlWriter()->endElement(); - } + + if (trim($issueData["issueTitle_2"]) != '') { + $this->getXmlWriter()->startElement("title"); + $this->addLocaleAttribute($issueData["locale_2"]); + $this->getXmlWriter()->writeRaw(xmlFormat($issueData["issueTitle_2"])); + $this->getXmlWriter()->endElement(); + } $this->getXmlWriter()->endElement(); $this->getXmlWriter()->startElement("date_published"); @@ -128,7 +134,8 @@ function writeIssueMetadata($issueData) { * @param string $titleName Issue title * @param string $volume Volume number */ - function writeSections($titleName, $volume, $issue) { + function writeSections($titleName, $volume, $issue) + { $sectionsData = $this->getDBManager()->getSectionsData($titleName, $volume, $issue); $this->getXmlWriter()->startElement("sections"); @@ -145,7 +152,8 @@ function writeSections($titleName, $volume, $issue) { * * @param array $sectionData */ - function writeSection($sectionData) { + function writeSection($sectionData) + { $this->getXmlWriter()->startElement("section"); $sectionAbbrev = xmlFormat($sectionData["sectionAbbrev"]); $this->getXmlWriter()->writeAttribute("ref", $sectionAbbrev); @@ -163,13 +171,13 @@ function writeSection($sectionData) { $this->addLocaleAttribute(); $this->getXmlWriter()->writeRaw(xmlFormat($sectionData["sectionTitle"])); $this->getXmlWriter()->endElement(); - - if($sectionData["sectionTitle_2"] != ''){ - $this->getXmlWriter()->startElement("title"); - $this->addLocaleAttribute($sectionData["locale_2"]); - $this->getXmlWriter()->writeRaw(xmlFormat($sectionData["sectionTitle_2"])); - $this->getXmlWriter()->endElement(); - } + + if ($sectionData["sectionTitle_2"] != '') { + $this->getXmlWriter()->startElement("title"); + $this->addLocaleAttribute($sectionData["locale_2"]); + $this->getXmlWriter()->writeRaw(xmlFormat($sectionData["sectionTitle_2"])); + $this->getXmlWriter()->endElement(); + } $this->getXmlWriter()->endElement(); } @@ -178,7 +186,8 @@ function writeSection($sectionData) { * * @param array $issueData */ - function writeIssueCover($issueData) { + function writeIssueCover($issueData) + { if (trim($issueData["cover_image_filename"] == "")) return; $path = $this->_issueCoversDir . $issueData["cover_image_filename"]; @@ -205,7 +214,6 @@ function writeIssueCover($issueData) { $this->getXmlWriter()->endElement(); $this->getXmlWriter()->endElement(); - } /** @@ -213,7 +221,8 @@ function writeIssueCover($issueData) { * * @param array $issueData */ - function writeArticles($issueData) { + function writeArticles($issueData) + { $this->getXmlWriter()->startElement("articles"); $this->_setXmlnsAttributes(); @@ -250,11 +259,12 @@ function writeArticles($issueData) { * * @param array $articleData */ - function writeArticle($articleData) { + function writeArticle($articleData) + { $this->getXmlWriter()->startElement("article"); - $this->getXmlWriter()->writeAttribute("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance"); + $this->getXmlWriter()->writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); $this->getXmlWriter()->writeAttribute("status", "3"); - $this->getXmlWriter()->writeAttribute("stage" ,"production"); + $this->getXmlWriter()->writeAttribute("stage", "production"); $this->getXmlWriter()->writeAttribute("current_publication_id", $articleData["currentId"]); $this->_writeIdElement($articleData["currentId"]); @@ -264,38 +274,42 @@ function writeArticle($articleData) { $this->writePublication($articleData); $this->getXmlWriter()->endElement(); - } - function _writeSubmissionFile(array $articleData) { + function _writeSubmissionFile(array $articleData) + { if (trim($articleData["fileName"] == "")) return; $path = $this->_articleGalleysDir . $articleData["fileName"]; - $filesize = filesize($path); + $filesize = filesize($path); $type = pathinfo($path, PATHINFO_EXTENSION); $data = file_get_contents($path); $articleGalleyBase64 = base64_encode($data); $this->getXmlWriter()->startElement("submission_file"); - $this->getXmlWriter()->writeAttribute("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance"); + $this->getXmlWriter()->writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); $this->getXmlWriter()->writeAttribute("id", $articleData["currentId"]); - $this->getXmlWriter()->writeAttribute("file_id", $articleData["currentId"]); + $this->getXmlWriter()->writeAttribute("file_id", $articleData["currentId"]); $this->getXmlWriter()->writeAttribute("stage", "proof"); $this->getXmlWriter()->writeAttribute("viewable", "false"); $this->getXmlWriter()->writeAttribute("genre", $this->_getGenreName()); $this->getXmlWriter()->writeAttribute("uploader", $this->_user); $this->getXmlWriter()->writeAttribute("xsi:schemaLocation", "http://pkp.sfu.ca native.xsd"); - $this->getXmlWriter()->startElement("name"); + $this->getXmlWriter()->startElement("name"); $this->addLocaleAttribute(); - $this->getXmlWriter()->writeRaw($this->_user . ", " . $articleData["fileName"]); + + //The filename isn't always XML safe, filter it first + $fileNameSafe = htmlspecialchars($articleData["fileName"], ENT_XML1, 'UTF-8'); + + $this->getXmlWriter()->writeRaw($this->_user . ", " . $fileNameSafe); $this->getXmlWriter()->endElement(); $this->getXmlWriter()->startElement("file"); - $this->getXmlWriter()->writeAttribute("id", $articleData["currentId"]); - $this->getXmlWriter()->writeAttribute("filesize", $filesize); - $this->getXmlWriter()->writeAttribute("extension", $type); + $this->getXmlWriter()->writeAttribute("id", $articleData["currentId"]); + $this->getXmlWriter()->writeAttribute("filesize", $filesize); + $this->getXmlWriter()->writeAttribute("extension", $type); $this->getXmlWriter()->startElement("embed"); $this->getXmlWriter()->writeAttribute("encoding", "base64"); @@ -313,12 +327,21 @@ function _writeSubmissionFile(array $articleData) { * * @param array $articleData */ - function writePublication($articleData) { + function writePublication($articleData) + { $this->getXmlWriter()->startElement("publication"); - $this->getXmlWriter()->writeAttribute("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance"); - $this->addLocaleAttribute(); + $this->getXmlWriter()->writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); + //Don't do this if the destination is OJS 3.4 + if (!Config::get("is34")) { + $this->addLocaleAttribute(); + } $this->getXmlWriter()->writeAttribute("version", "1"); $this->getXmlWriter()->writeAttribute("status", "3"); + + //Try to avoid database integrity constraint violation for back issue import to 3.4 + if (Config::get("is34") && Config::get("isBackIssues")) { + $this->getXmlWriter()->writeAttribute("primary_contact_id", Config::get("defaultAuthorId")); + } $this->getXmlWriter()->writeAttribute("date_published", date("Y-m-d", strtotime(trim($articleData["datePublished"])))); $this->getXmlWriter()->writeAttribute("section_ref", $articleData["sectionAbbrev"]); $this->getXmlWriter()->writeAttribute("seq", $articleData["publicationSeq"]); @@ -328,9 +351,9 @@ function writePublication($articleData) { $this->writePublicationMetadata($articleData); $this->writeAuthors($articleData); $this->writeArticleGalley($articleData); - - $this->writeCitations($articleData["citations"]); - + + $this->writeCitations($articleData["citations"]); + $this->getXmlWriter()->startElement("pages"); $this->getXmlWriter()->writeRaw(trim($articleData["pages"], "-")); @@ -338,11 +361,12 @@ function writePublication($articleData) { $this->getXmlWriter()->endElement(); } - - - function writeCitations($citationString){ - - if ($citationString != "") { + + + function writeCitations($citationString) + { + + if ($citationString != "") { $citations = parseNewLine($citationString); $this->getXmlWriter()->startElement("citations"); $this->addLocaleAttribute(); @@ -353,16 +377,16 @@ function writeCitations($citationString){ } $this->getXmlWriter()->endElement(); } - - } - + } + /** * Writes out publication metadata, including, title, abstract, keywords, etc. * * @param array $articleData */ - function writePublicationMetadata($articleData) { + function writePublicationMetadata($articleData) + { $doi = trim($articleData["DOI"]); if ($doi != "") { @@ -377,13 +401,13 @@ function writePublicationMetadata($articleData) { $this->addLocaleAttribute(); $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["articleTitle"]))); $this->getXmlWriter()->endElement(); - - if($articleData["articleTitle_2"] != ''){ - $this->getXmlWriter()->startElement("title"); - $this->addLocaleAttribute($articleData["locale_2"]); - $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["articleTitle_2"]))); - $this->getXmlWriter()->endElement(); - } + + if ($articleData["articleTitle_2"] != '') { + $this->getXmlWriter()->startElement("title"); + $this->addLocaleAttribute($articleData["locale_2"]); + $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["articleTitle_2"]))); + $this->getXmlWriter()->endElement(); + } if (isset($articleData["subTitle"]) && trim($articleData["subTitle"]) != "") { $this->getXmlWriter()->startElement("subtitle"); @@ -394,34 +418,34 @@ function writePublicationMetadata($articleData) { $this->getXmlWriter()->startElement("abstract"); $this->addLocaleAttribute(); - $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["abstract"])??''))); - $this->getXmlWriter()->endElement(); - - if($articleData["articleAbstract_2"] != ''){ - $this->getXmlWriter()->startElement("abstract"); - $this->addLocaleAttribute($articleData["locale_2"]); - $this->getXmlWriter()->writeRaw(xmlFormat( trim($articleData["articleAbstract_2"]) )); - $this->getXmlWriter()->endElement(); - } - - - - $this->getXmlWriter()->startElement("licenseUrl"); + $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["abstract"]) ?? '')); + $this->getXmlWriter()->endElement(); + + if ($articleData["articleAbstract_2"] != '') { + $this->getXmlWriter()->startElement("abstract"); + $this->addLocaleAttribute($articleData["locale_2"]); + $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["articleAbstract_2"]))); + $this->getXmlWriter()->endElement(); + } + + + + $this->getXmlWriter()->startElement("licenseUrl"); $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["licenseUrl"]))); $this->getXmlWriter()->endElement(); - - $this->getXmlWriter()->startElement("copyrightHolder"); - $this->addLocaleAttribute(); + + $this->getXmlWriter()->startElement("copyrightHolder"); + $this->addLocaleAttribute(); $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["copyrightHolder"]))); $this->getXmlWriter()->endElement(); - - if(trim($articleData["copyrightYear"]) != ""){ - $this->getXmlWriter()->startElement("copyrightYear"); - $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["copyrightYear"]))); - $this->getXmlWriter()->endElement(); - } - - + + if (trim($articleData["copyrightYear"]) != "") { + $this->getXmlWriter()->startElement("copyrightYear"); + $this->getXmlWriter()->writeRaw(xmlFormat(trim($articleData["copyrightYear"]))); + $this->getXmlWriter()->endElement(); + } + + if (semiColonFix($articleData["keywords"] != "")) { $keywordArray = parseSemiColon($articleData["keywords"]); @@ -434,11 +458,6 @@ function writePublicationMetadata($articleData) { } $this->getXmlWriter()->endElement(); } - - - - - } /** @@ -446,7 +465,8 @@ function writePublicationMetadata($articleData) { * * @param array $articleData */ - function writeAuthors($articleData) { + function writeAuthors($articleData) + { $authors = new Authors($articleData["authors"], $articleData["authorEmail"], $articleData["affiliations"]); $this->getXmlWriter()->startElement("authors"); @@ -461,7 +481,6 @@ function writeAuthors($articleData) { } $this->getXmlWriter()->endElement(); - } /** @@ -469,7 +488,8 @@ function writeAuthors($articleData) { * * @param array $autorData */ - function writeAuthor($autorData) { + function writeAuthor($autorData) + { $this->getXmlWriter()->startElement("author"); $this->getXmlWriter()->writeAttribute("user_group_ref", "Author"); // First author in list is considered primary contact @@ -477,7 +497,13 @@ function writeAuthor($autorData) { $this->getXmlWriter()->writeAttribute("primary_contact", "true"); } $this->getXmlWriter()->writeAttribute("seq", $autorData["seq"]); - $this->getXmlWriter()->writeAttribute("id", $autorData["currentId"]); + + //Try to avoid database integrity constraint violation for back issue import to 3.4 + if (Config::get("is34") && Config::get("isBackIssues")) { + $this->getXmlWriter()->writeAttribute("id", Config::get("defaultAuthorId")); + } else { + $this->getXmlWriter()->writeAttribute("id", $autorData["currentId"]); + } $this->getXmlWriter()->startElement("givenname"); $this->addLocaleAttribute(); @@ -509,17 +535,18 @@ function writeAuthor($autorData) { $this->getXmlWriter()->endElement(); } - function writeArticleGalley($articleData) { + function writeArticleGalley($articleData) + { $fileName = $articleData["fileName"]; $fileExt = get_file_extension($fileName); // Disabled for OJS 3.2 -// $pdfUrl = Config::get("pdf_url"); + // $pdfUrl = Config::get("pdf_url"); $this->getXmlWriter()->startElement("article_galley"); - $this->getXmlWriter()->writeAttribute("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance"); + $this->getXmlWriter()->writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); $this->addLocaleAttribute(); $this->getXmlWriter()->writeAttribute("approved", "false"); - $this->getXmlWriter()->writeAttribute("xsi:schemaLocation","http://pkp.sfu.ca native.xsd"); + $this->getXmlWriter()->writeAttribute("xsi:schemaLocation", "http://pkp.sfu.ca native.xsd"); $this->_writeIdElement($articleData["currentId"]); @@ -537,9 +564,9 @@ function writeArticleGalley($articleData) { $this->getXmlWriter()->endElement(); // Disabled for OJS 3.2 -// $this->getXmlWriter()->startElement("remote"); -// $this->getXmlWriter()->writeAttribute("src", $pdfUrl . xmlFormat($fileName)); -// $this->getXmlWriter()->endElement(); + // $this->getXmlWriter()->startElement("remote"); + // $this->getXmlWriter()->writeAttribute("src", $pdfUrl . xmlFormat($fileName)); + // $this->getXmlWriter()->endElement(); $this->getXmlWriter()->endElement(); } @@ -549,11 +576,12 @@ function writeArticleGalley($articleData) { /** * @param false $includeSchemaLocation Includes xsi schema location */ - function _setXmlnsAttributes($includeSchemaLocation = false) { - $this->getXmlWriter()->writeAttribute("xmlns","http://pkp.sfu.ca"); - $this->getXmlWriter()->writeAttribute("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance"); + function _setXmlnsAttributes($includeSchemaLocation = false) + { + $this->getXmlWriter()->writeAttribute("xmlns", "http://pkp.sfu.ca"); + $this->getXmlWriter()->writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); if ($includeSchemaLocation) { - $this->getXmlWriter()->writeAttribute("xsi:schemaLocation","http://pkp.sfu.ca native.xsd"); + $this->getXmlWriter()->writeAttribute("xsi:schemaLocation", "http://pkp.sfu.ca native.xsd"); } } @@ -564,7 +592,8 @@ function _setXmlnsAttributes($includeSchemaLocation = false) { * * @param $currentId */ - function _writeIdElement($currentId) { + function _writeIdElement($currentId) + { $this->getXmlWriter()->startElement("id"); $this->getXmlWriter()->writeAttribute("type", "internal"); $this->getXmlWriter()->writeAttribute("advice", "ignore"); @@ -572,7 +601,8 @@ function _writeIdElement($currentId) { $this->getXmlWriter()->endElement(); } - function _getGenreName() { + function _getGenreName() + { $customFileGenre = Config::get('genreName'); if (!empty($customFileGenre)) { return $customFileGenre; diff --git a/config.ini b/config.ini index 8d79263..21c87d1 100644 --- a/config.ini +++ b/config.ini @@ -14,7 +14,10 @@ pdf_url = http://127.0.0.1/ ; Required fields for OJS. To be applied across all conversions author_country = "CA" -locale = "en_US" + +;locale = "en_US" +;For OJS 3.4 you must set this to just 'en' +locale = "en" ; For use when formatting dates via DateTime::createFromFormat() dateFormat = "d/m/Y" @@ -24,3 +27,18 @@ genreName = "" ; Outputs info written to console to a file for reference logLocation = "/tmp" + +;Set wehether we're doing a back issues import +;Applies to 3.4 only - we want everything under the default author id to avoid database integrity constraint violation +isBackIssues = True + +; Default author id +; For a back issues import, choose the id of an existing author, such as an editor +defaultAuthorId = 12345 + +; Set whether destination OJS is version 3.4 +is34 = True + +; To get this to work on 3.4, a couple of hacky modifications to: +; plugins/importexport/native/filter/NativeXmlIssueFilter.php +; on lines 340 and 347 are required (casts to float and int respectively) \ No newline at end of file diff --git a/docroot/csv/abstracts/article_galleys/.gitkeep b/docroot/csv/abstracts/article_galleys/.gitkeep deleted file mode 100644 index e69de29..0000000