From 6c914d2758a72b0157b4613f4d1dc0ab6ad6420d Mon Sep 17 00:00:00 2001 From: Steve Perkins Date: Tue, 1 Jul 2025 13:48:15 -0400 Subject: [PATCH 1/4] partial ddex 4 support. --- .gitignore | 3 ++- src/parseDelivery.ts | 22 ++++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index e66c6ac..4f4f594 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ reports .DS_Store local-storage -src/reporting/data/* \ No newline at end of file +src/reporting/data/* +data diff --git a/src/parseDelivery.ts b/src/parseDelivery.ts index 246f71c..3f457bf 100644 --- a/src/parseDelivery.ts +++ b/src/parseDelivery.ts @@ -56,6 +56,7 @@ type ReleaseAndSoundRecordingSharedFields = { copyrightLine?: CopyrightPair producerCopyrightLine?: CopyrightPair parentalWarningType?: string + artistsWip: string artists: DDEXContributor[] contributors: DDEXContributor[] indirectContributors: DDEXContributor[] @@ -414,6 +415,15 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { const imageResources: Record = {} const textResources: Record = {} + const partList: Record = {} + + $('PartyList > Party').each((_, el) => { + const $el = $(el) + const ref = $el.find('PartyReference').text() + const name = $el.find('FullName').text() + partList[ref] = name + }) + $('ResourceList > SoundRecording').each((_, el) => { const $el = $(el) @@ -427,6 +437,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { fileName: $el.find('FileName:first').text(), title: $el.find('TitleText:first').text(), subTitle: $el.find('SubTitle:first').text(), + artistsWip: $el.find('DisplayArtistName').text(), artists: parseContributor('DisplayArtist', $el), contributors: parseContributor('ResourceContributor', $el), indirectContributors: parseContributor( @@ -450,6 +461,8 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { parentalWarningType: toText($el.find('ParentalWarningType')), } + console.log(recording) + const rightsController = $el.find('RightsController').first() if (rightsController.length) { recording.rightsController = { @@ -508,7 +521,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { const release: DDEXRelease = { ref, - title: $el.find('ReferenceTitle TitleText').text(), + title: $el.find('ReferenceTitle TitleText, Release TitleText').text(), subTitle: $el.find('ReferenceTitle SubTitle').text(), artists: parseContributor('DisplayArtist', $el), contributors: parseContributor('ResourceContributor', $el), @@ -550,7 +563,12 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { // resolve resources $el - .find('ReleaseResourceReferenceList > ReleaseResourceReference') + .find( + ` + ReleaseResourceReferenceList > ReleaseResourceReference, + ResourceGroup ReleaseResourceReference + ` + ) .each((_, el) => { const ref = $(el).text() From 6275ddc65271a41b896cfb35447e2adfc6f10f0f Mon Sep 17 00:00:00 2001 From: Raymond Jacobson Date: Thu, 3 Jul 2025 15:24:55 -0700 Subject: [PATCH 2/4] 4.0 support --- src/parseDelivery.ts | 531 +++++++++++++++++++++++++++++-------------- 1 file changed, 364 insertions(+), 167 deletions(-) diff --git a/src/parseDelivery.ts b/src/parseDelivery.ts index 3f457bf..3b6a294 100644 --- a/src/parseDelivery.ts +++ b/src/parseDelivery.ts @@ -56,7 +56,6 @@ type ReleaseAndSoundRecordingSharedFields = { copyrightLine?: CopyrightPair producerCopyrightLine?: CopyrightPair parentalWarningType?: string - artistsWip: string artists: DDEXContributor[] contributors: DDEXContributor[] indirectContributors: DDEXContributor[] @@ -198,6 +197,9 @@ export async function parseDdexXml( ].find((n) => rawTagName.includes(n)) const isUpdate = $('UpdateIndicator').text() == 'UpdateMessage' + // Detect DDEX version + const isDdex40 = rawTagName.includes('ernm:') || xmlText.includes('http://ddex.net/xml/ern/4') + // todo: would be nice to skip this on reParse await xmlRepo.upsert({ source, @@ -218,7 +220,7 @@ export async function parseDdexXml( ) } else if (tagName == 'NewReleaseMessage') { // create or replace this release in db - const releases = await parseReleaseXml(source, $) + const releases = await parseReleaseXml(source, $, isDdex40) for (const release of releases) { await releaseRepo.upsert(source, xmlUrl, messageTimestamp, release) } @@ -231,7 +233,7 @@ export async function parseDdexXml( // // parseRelease // -async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { +async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: boolean) { function toTexts($doc: CH) { return $doc.map((_, el) => $(el).text()).get() } @@ -249,12 +251,19 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { } function parseGenres($el: CH): [genre: string, subGenre: string] { - const genres = toTexts($el.find('GenreText')) - let subGenres = toTexts($el.find('SubGenre')) - if (!subGenres.length) { - subGenres = genres.slice(1) + if (isDdex40) { + // DDEX 4.0 structure + const genreText = toText($el.find('Genre > GenreText')) + return [genreText || '', ''] + } else { + // DDEX 3.8 structure + const genres = toTexts($el.find('GenreText')) + let subGenres = toTexts($el.find('SubGenre')) + if (!subGenres.length) { + subGenres = genres.slice(1) + } + return [genres[0] || '', subGenres[0] || ''] } - return [genres[0] || '', subGenres[0] || ''] } function parseContributor( @@ -264,137 +273,310 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { | 'IndirectResourceContributor', $el: CH ): DDEXContributor[] { - const roleTagName = - tagName == 'DisplayArtist' ? 'ArtistRole' : `${tagName}Role` - - return $el - .find(tagName) - .toArray() - .map((el) => { - const roleTag = $(el).find(roleTagName).first() - return { - name: toText($(el).find('FullName')), - role: roleTag.attr('UserDefinedValue') || roleTag.text(), - } - }) + if (isDdex40) { + // DDEX 4.0 structure + if (tagName === 'DisplayArtist') { + return $el + .find('DisplayArtist') + .toArray() + .map((el) => { + const partyRef = $(el).find('ArtistPartyReference').text() + const role = $(el).find('DisplayArtistRole').text() + return { + name: partList[partyRef] || partyRef, + role: role, + } + }) + } else if (tagName === 'ResourceContributor' || tagName === 'IndirectResourceContributor') { + return $el + .find('Contributor') + .toArray() + .map((el) => { + const partyRef = $(el).find('ContributorPartyReference').text() + const roleTag = $(el).find('Role').first() + const role = roleTag.attr('UserDefinedValue') || roleTag.text() + return { + name: partList[partyRef] || partyRef, + role: role, + } + }) + } + } else { + // DDEX 3.8 structure + const roleTagName = + tagName == 'DisplayArtist' ? 'ArtistRole' : `${tagName}Role` + + return $el + .find(tagName) + .toArray() + .map((el) => { + const roleTag = $(el).find(roleTagName).first() + return { + name: toText($(el).find('FullName')), + role: roleTag.attr('UserDefinedValue') || roleTag.text(), + } + }) + } + return [] + } + + // + // Build party resolution map for DDEX 4.0 + // + const partList: Record = {} + + if (isDdex40) { + $('PartyList > Party').each((_, el) => { + const $el = $(el) + const ref = $el.find('PartyReference').text() + const name = $el.find('PartyName > FullName').text() + partList[ref] = name + }) + } else { + // Legacy: For DDEX 3.8, build the existing partyList + $('PartyList > Party').each((_, el) => { + const $el = $(el) + const ref = $el.find('PartyReference').text() + const name = $el.find('FullName').text() + partList[ref] = name + }) } // // parse deals // const releaseDeals: Record = {} - $('ReleaseDeal').each((_, el) => { - const $el = $(el) - const ref = $el.find('DealReleaseReference').text() - $el.find('DealTerms').each((_, el) => { + + if (isDdex40) { + // DDEX 4.0 structure: DealList > ReleaseDeal > Deal > DealTerms + $('DealList > ReleaseDeal').each((_, el) => { const $el = $(el) + const ref = $el.find('ReleaseReference').text() + + $el.find('Deal > DealTerms').each((_, el) => { + const $el = $(el) + + const cmt = $el.find('CommercialModelType') + const commercialModelType = cmt.attr('UserDefinedValue') || cmt.text() + const usageTypes = toTexts($el.find('UseType')) + const territoryCode = toTexts($el.find('TerritoryCode')) + const validityStartDate = $el.find('ValidityPeriod > StartDateTime').text() + const validityEndDate = $el.find('ValidityPeriod > EndDateTime').text() + + // only consider Worldwide + const isWorldwide = territoryCode.includes('Worldwide') || territoryCode.length > 100 // Many territories listed means worldwide + if (!isWorldwide) { + return + } - const cmt = $el.find('CommercialModelType') - const commercialModelType = cmt.attr('UserDefinedValue') || cmt.text() - const usageTypes = toTexts($el.find('UseType')) - const territoryCode = toTexts($el.find('TerritoryCode')) - const validityStartDate = $el.find('ValidityPeriod > StartDate').text() - const validityEndDate = $el.find('ValidityPeriod > EndDate').text() - - // only consider Worldwide - const isWorldwide = territoryCode.includes('Worldwide') - if (!isWorldwide) { - return - } + // check date range + { + const startDate = new Date(validityStartDate) + const endDate = new Date(validityEndDate) + const now = new Date() + if (startDate && now < startDate) { + return + } + if (endDate && now > endDate) { + return + } + } - // check date range - { - const startDate = new Date(validityStartDate) - const endDate = new Date(validityEndDate) - const now = new Date() - if (startDate && now < startDate) { - return + // add deal + function addDeal(deal: AudiusSupportedDeal) { + releaseDeals[ref] ||= [] + releaseDeals[ref].push(deal) } - if (endDate && now > endDate) { - return + + const common: DealFields = { + forStream: + usageTypes.includes('OnDemandStream') || + usageTypes.includes('Stream'), + forDownload: usageTypes.includes('PermanentDownload'), + validityStartDate, + validityEndDate, } - } - // add deal - function addDeal(deal: AudiusSupportedDeal) { - releaseDeals[ref] ||= [] - releaseDeals[ref].push(deal) - } + if (commercialModelType == 'FreeOfChargeModel') { + addDeal({ + ...common, + audiusDealType: 'Free', + }) + } else if (commercialModelType == 'PayAsYouGoModel') { + const deal: DealPayGated = { + ...common, + audiusDealType: 'PayGated', + } + const priceUsd = parseFloat( + $el.find('WholesalePricePerUnit[CurrencyCode="USD"]').text() + ) + if (priceUsd) { + deal.priceUsd = priceUsd + } + addDeal(deal) + } else if ( + commercialModelType == 'FollowGated' || + commercialModelType == 'TipGated' + ) { + addDeal({ + ...common, + audiusDealType: commercialModelType, + }) + } else if (commercialModelType == 'NFTGated') { + const chain = $el.find('Chain').text() + const address = $el.find('Address').text() + const name = $el.find('Name').text() + const imageUrl = $el.find('ImageUrl').text() + const externalLink = $el.find('ExternalLink').text() + + // eth specific + const standard = $el.find('Standard').text() + const slug = $el.find('Slug').text() + + switch (chain) { + case 'eth': + addDeal({ + ...common, + audiusDealType: 'NFTGated', + chain, + address, + name, + imageUrl, + externalLink, + standard, + slug, + }) + break + case 'sol': + addDeal({ + ...common, + audiusDealType: 'NFTGated', + chain, + address, + name, + imageUrl, + externalLink, + }) + break + } + } + }) + }) + } else { + // DDEX 3.8 structure: ReleaseDeal + $('ReleaseDeal').each((_, el) => { + const $el = $(el) + const ref = $el.find('DealReleaseReference').text() + $el.find('DealTerms').each((_, el) => { + const $el = $(el) + + const cmt = $el.find('CommercialModelType') + const commercialModelType = cmt.attr('UserDefinedValue') || cmt.text() + const usageTypes = toTexts($el.find('UseType')) + const territoryCode = toTexts($el.find('TerritoryCode')) + const validityStartDate = $el.find('ValidityPeriod > StartDate').text() + const validityEndDate = $el.find('ValidityPeriod > EndDate').text() + + // only consider Worldwide + const isWorldwide = territoryCode.includes('Worldwide') + if (!isWorldwide) { + return + } - const common: DealFields = { - forStream: - usageTypes.includes('OnDemandStream') || - usageTypes.includes('Stream'), - forDownload: usageTypes.includes('PermanentDownload'), - validityStartDate, - validityEndDate, - } + // check date range + { + const startDate = new Date(validityStartDate) + const endDate = new Date(validityEndDate) + const now = new Date() + if (startDate && now < startDate) { + return + } + if (endDate && now > endDate) { + return + } + } - if (commercialModelType == 'FreeOfChargeModel') { - addDeal({ - ...common, - audiusDealType: 'Free', - }) - } else if (commercialModelType == 'PayAsYouGoModel') { - const deal: DealPayGated = { - ...common, - audiusDealType: 'PayGated', + // add deal + function addDeal(deal: AudiusSupportedDeal) { + releaseDeals[ref] ||= [] + releaseDeals[ref].push(deal) } - const priceUsd = parseFloat( - $el.find('WholesalePricePerUnit[CurrencyCode="USD"]').text() - ) - if (priceUsd) { - deal.priceUsd = priceUsd + + const common: DealFields = { + forStream: + usageTypes.includes('OnDemandStream') || + usageTypes.includes('Stream'), + forDownload: usageTypes.includes('PermanentDownload'), + validityStartDate, + validityEndDate, } - addDeal(deal) - } else if ( - commercialModelType == 'FollowGated' || - commercialModelType == 'TipGated' - ) { - addDeal({ - ...common, - audiusDealType: commercialModelType, - }) - } else if (commercialModelType == 'NFTGated') { - const chain = $el.find('Chain').text() - const address = $el.find('Address').text() - const name = $el.find('Name').text() - const imageUrl = $el.find('ImageUrl').text() - const externalLink = $el.find('ExternalLink').text() - - // eth specific - const standard = $el.find('Standard').text() - const slug = $el.find('Slug').text() - - switch (chain) { - case 'eth': - addDeal({ - ...common, - audiusDealType: 'NFTGated', - chain, - address, - name, - imageUrl, - externalLink, - standard, - slug, - }) - break - case 'sol': - addDeal({ - ...common, - audiusDealType: 'NFTGated', - chain, - address, - name, - imageUrl, - externalLink, - }) - break + + if (commercialModelType == 'FreeOfChargeModel') { + addDeal({ + ...common, + audiusDealType: 'Free', + }) + } else if (commercialModelType == 'PayAsYouGoModel') { + const deal: DealPayGated = { + ...common, + audiusDealType: 'PayGated', + } + const priceUsd = parseFloat( + $el.find('WholesalePricePerUnit[CurrencyCode="USD"]').text() + ) + if (priceUsd) { + deal.priceUsd = priceUsd + } + addDeal(deal) + } else if ( + commercialModelType == 'FollowGated' || + commercialModelType == 'TipGated' + ) { + addDeal({ + ...common, + audiusDealType: commercialModelType, + }) + } else if (commercialModelType == 'NFTGated') { + const chain = $el.find('Chain').text() + const address = $el.find('Address').text() + const name = $el.find('Name').text() + const imageUrl = $el.find('ImageUrl').text() + const externalLink = $el.find('ExternalLink').text() + + // eth specific + const standard = $el.find('Standard').text() + const slug = $el.find('Slug').text() + + switch (chain) { + case 'eth': + addDeal({ + ...common, + audiusDealType: 'NFTGated', + chain, + address, + name, + imageUrl, + externalLink, + standard, + slug, + }) + break + case 'sol': + addDeal({ + ...common, + audiusDealType: 'NFTGated', + chain, + address, + name, + imageUrl, + externalLink, + }) + break + } } - } + }) }) - }) + } // after parsing deals... if there is only a forDownload deal and no forStream deal // mark the forDownload deal as forStream... which will make this a pay gated track. @@ -415,15 +597,6 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { const imageResources: Record = {} const textResources: Record = {} - const partList: Record = {} - - $('PartyList > Party').each((_, el) => { - const $el = $(el) - const ref = $el.find('PartyReference').text() - const name = $el.find('FullName').text() - partList[ref] = name - }) - $('ResourceList > SoundRecording').each((_, el) => { const $el = $(el) @@ -431,38 +604,49 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { const recording: DDEXSoundRecording = { ref: $el.find('ResourceReference').text(), - isrc: $el.find('ISRC').text(), - - filePath: $el.find('FilePath:first').text(), - fileName: $el.find('FileName:first').text(), - title: $el.find('TitleText:first').text(), - subTitle: $el.find('SubTitle:first').text(), - artistsWip: $el.find('DisplayArtistName').text(), + isrc: isDdex40 + ? $el.find('SoundRecordingEdition > ResourceId > ISRC').text() + : $el.find('ISRC').text(), + + filePath: isDdex40 + ? $el.find('SoundRecordingEdition > TechnicalDetails > DeliveryFile > File > URI').text() + : $el.find('FilePath:first').text(), + fileName: isDdex40 + ? $el.find('SoundRecordingEdition > TechnicalDetails > DeliveryFile > File > URI').text().split('/').pop() || '' + : $el.find('FileName:first').text(), + title: isDdex40 + ? $el.find('DisplayTitle > TitleText:first').text() + : $el.find('TitleText:first').text(), + subTitle: isDdex40 + ? $el.find('DisplayTitle > SubTitle:first').text() + : $el.find('SubTitle:first').text(), artists: parseContributor('DisplayArtist', $el), contributors: parseContributor('ResourceContributor', $el), indirectContributors: parseContributor( 'IndirectResourceContributor', $el ), - labelName: $el.find('LabelName').text(), + labelName: isDdex40 + ? '' // Label name is not directly in SoundRecording for DDEX 4.0 + : $el.find('LabelName').text(), duration: parseDuration($el.find('Duration').text()), - previewStartSeconds: parseInt( - $el.find('PreviewDetails > StartPoint:first').text() - ), + previewStartSeconds: isDdex40 + ? parseInt($el.find('ClipDetails > Timing > StartPoint:first').text()) / 1000 + : parseInt($el.find('PreviewDetails > StartPoint:first').text()), genre: genre, subGenre: subGenre, - releaseDate: $el - .find('OriginalResourceReleaseDate, ResourceReleaseDate') - .first() - .text(), + releaseDate: isDdex40 + ? $el.find('FirstPublicationDate').text() + : $el + .find('OriginalResourceReleaseDate, ResourceReleaseDate') + .first() + .text(), copyrightLine: cline($el), producerCopyrightLine: pline($el), parentalWarningType: toText($el.find('ParentalWarningType')), } - console.log(recording) - const rightsController = $el.find('RightsController').first() if (rightsController.length) { recording.rightsController = { @@ -481,11 +665,15 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { }) function ddexResourceReducer(acc: Record, el: any) { - const [ref, filePath, fileName] = [ - 'ResourceReference', - 'FilePath', - 'FileName', - ].map((k) => $(el).find(k).text()) + const $el = $(el) + const ref = $el.find('ResourceReference').text() + const filePath = isDdex40 + ? $el.find('TechnicalDetails > File > URI').text() + : $el.find('FilePath').text() + const fileName = isDdex40 + ? filePath.split('/').pop() || '' + : $el.find('FileName').text() + acc[ref] = { ref, filePath, fileName } return acc } @@ -500,7 +688,9 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { // parse releases // - const work = $('Release') + const releaseSelector = isDdex40 ? 'ReleaseList > Release' : 'Release' + + const work = $(releaseSelector) .toArray() .map(async (el) => { const $el = $(el) @@ -511,25 +701,33 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { ? deals[0].validityStartDate : undefined - const releaseDate = - validityStartDate || - $el.find('ReleaseDate').text() || - $el.find('GlobalOriginalReleaseDate').text() || - $el.find('OriginalReleaseDate').text() + const releaseDate = isDdex40 + ? (validityStartDate || + $el.find('OriginalReleaseDate').text()) + : (validityStartDate || + $el.find('ReleaseDate').text() || + $el.find('GlobalOriginalReleaseDate').text() || + $el.find('OriginalReleaseDate').text()) const [genre, subGenre] = parseGenres($el) const release: DDEXRelease = { ref, - title: $el.find('ReferenceTitle TitleText, Release TitleText').text(), - subTitle: $el.find('ReferenceTitle SubTitle').text(), + title: isDdex40 + ? $el.find('DisplayTitle > TitleText').text() + : $el.find('ReferenceTitle TitleText, Release TitleText').text(), + subTitle: isDdex40 + ? $el.find('DisplayTitle > SubTitle').text() + : $el.find('ReferenceTitle SubTitle').text(), artists: parseContributor('DisplayArtist', $el), contributors: parseContributor('ResourceContributor', $el), indirectContributors: parseContributor( 'IndirectResourceContributor', $el ), - labelName: $el.find('LabelName').text(), + labelName: isDdex40 + ? (partList[$el.find('ReleaseLabelReference').text()] || '') + : $el.find('LabelName').text(), genre, subGenre, releaseIds: parseReleaseIds($el), @@ -562,13 +760,12 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI) { } // resolve resources + const resourceSelector = isDdex40 + ? 'ResourceGroup ReleaseResourceReference, ResourceGroupContentItem > ReleaseResourceReference' + : 'ReleaseResourceReferenceList > ReleaseResourceReference, ResourceGroup ReleaseResourceReference' + $el - .find( - ` - ReleaseResourceReferenceList > ReleaseResourceReference, - ResourceGroup ReleaseResourceReference - ` - ) + .find(resourceSelector) .each((_, el) => { const ref = $(el).text() From 82866aac2324397264f674d7ef505eb465ca2cfd Mon Sep 17 00:00:00 2001 From: Raymond Jacobson Date: Thu, 3 Jul 2025 15:37:43 -0700 Subject: [PATCH 3/4] try to fix images --- src/parseDelivery.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parseDelivery.ts b/src/parseDelivery.ts index 3b6a294..cf76547 100644 --- a/src/parseDelivery.ts +++ b/src/parseDelivery.ts @@ -761,7 +761,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: // resolve resources const resourceSelector = isDdex40 - ? 'ResourceGroup ReleaseResourceReference, ResourceGroupContentItem > ReleaseResourceReference' + ? 'ResourceGroup ReleaseResourceReference, ResourceGroupContentItem > ReleaseResourceReference, LinkedReleaseResourceReference' : 'ReleaseResourceReferenceList > ReleaseResourceReference, ResourceGroup ReleaseResourceReference' $el From a93dac69dc716c89542ee4a9ea2a5bcee5bffd22 Mon Sep 17 00:00:00 2001 From: Raymond Jacobson Date: Thu, 3 Jul 2025 16:59:22 -0700 Subject: [PATCH 4/4] Get 4.0 support working --- src/parseDelivery.ts | 66 ++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/src/parseDelivery.ts b/src/parseDelivery.ts index cf76547..a50843c 100644 --- a/src/parseDelivery.ts +++ b/src/parseDelivery.ts @@ -198,7 +198,7 @@ export async function parseDdexXml( const isUpdate = $('UpdateIndicator').text() == 'UpdateMessage' // Detect DDEX version - const isDdex40 = rawTagName.includes('ernm:') || xmlText.includes('http://ddex.net/xml/ern/4') + const isDdex40 = xmlText.includes('http://ddex.net/xml/ern/4') // todo: would be nice to skip this on reParse await xmlRepo.upsert({ @@ -256,7 +256,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: const genreText = toText($el.find('Genre > GenreText')) return [genreText || '', ''] } else { - // DDEX 3.8 structure + // DDEX 3.8 structure - original working logic const genres = toTexts($el.find('GenreText')) let subGenres = toTexts($el.find('SubGenre')) if (!subGenres.length) { @@ -302,7 +302,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: }) } } else { - // DDEX 3.8 structure + // DDEX 3.8 structure - original working logic const roleTagName = tagName == 'DisplayArtist' ? 'ArtistRole' : `${tagName}Role` @@ -329,7 +329,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: $('PartyList > Party').each((_, el) => { const $el = $(el) const ref = $el.find('PartyReference').text() - const name = $el.find('PartyName > FullName').text() + const name = $el.find('PartyName').first().find('FullName').text() partList[ref] = name }) } else { @@ -472,7 +472,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: const cmt = $el.find('CommercialModelType') const commercialModelType = cmt.attr('UserDefinedValue') || cmt.text() - const usageTypes = toTexts($el.find('UseType')) + const usageTypes = toTexts($el.find('Usage > UseType')) const territoryCode = toTexts($el.find('TerritoryCode')) const validityStartDate = $el.find('ValidityPeriod > StartDate').text() const validityEndDate = $el.find('ValidityPeriod > EndDate').text() @@ -608,17 +608,29 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: ? $el.find('SoundRecordingEdition > ResourceId > ISRC').text() : $el.find('ISRC').text(), - filePath: isDdex40 - ? $el.find('SoundRecordingEdition > TechnicalDetails > DeliveryFile > File > URI').text() - : $el.find('FilePath:first').text(), - fileName: isDdex40 - ? $el.find('SoundRecordingEdition > TechnicalDetails > DeliveryFile > File > URI').text().split('/').pop() || '' - : $el.find('FileName:first').text(), + filePath: (() => { + if (isDdex40) { + const fullUri = $el.find('SoundRecordingEdition > TechnicalDetails > DeliveryFile > File > URI').text() + const lastSlashIndex = fullUri.lastIndexOf('/') + return lastSlashIndex !== -1 ? fullUri.substring(0, lastSlashIndex + 1) : '' + } else { + return $el.find('FilePath:first').text() + } + })(), + fileName: (() => { + if (isDdex40) { + const fullUri = $el.find('SoundRecordingEdition > TechnicalDetails > DeliveryFile > File > URI').text() + const lastSlashIndex = fullUri.lastIndexOf('/') + return lastSlashIndex !== -1 ? fullUri.substring(lastSlashIndex + 1) : fullUri + } else { + return $el.find('FileName:first').text() + } + })(), title: isDdex40 - ? $el.find('DisplayTitle > TitleText:first').text() + ? $el.find('DisplayTitle > TitleText').text() : $el.find('TitleText:first').text(), subTitle: isDdex40 - ? $el.find('DisplayTitle > SubTitle:first').text() + ? $el.find('DisplayTitle > SubTitle').text() : $el.find('SubTitle:first').text(), artists: parseContributor('DisplayArtist', $el), contributors: parseContributor('ResourceContributor', $el), @@ -667,12 +679,24 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: function ddexResourceReducer(acc: Record, el: any) { const $el = $(el) const ref = $el.find('ResourceReference').text() - const filePath = isDdex40 - ? $el.find('TechnicalDetails > File > URI').text() - : $el.find('FilePath').text() - const fileName = isDdex40 - ? filePath.split('/').pop() || '' - : $el.find('FileName').text() + + let filePath: string + let fileName: string + + if (isDdex40) { + const fullUri = $el.find('TechnicalDetails > File > URI').text() + const lastSlashIndex = fullUri.lastIndexOf('/') + if (lastSlashIndex !== -1) { + filePath = fullUri.substring(0, lastSlashIndex + 1) // Include the trailing slash + fileName = fullUri.substring(lastSlashIndex + 1) + } else { + filePath = '' + fileName = fullUri + } + } else { + filePath = $el.find('FilePath').text() + fileName = $el.find('FileName').text() + } acc[ref] = { ref, filePath, fileName } return acc @@ -715,7 +739,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: ref, title: isDdex40 ? $el.find('DisplayTitle > TitleText').text() - : $el.find('ReferenceTitle TitleText, Release TitleText').text(), + : $el.find('ReferenceTitle TitleText').text(), subTitle: isDdex40 ? $el.find('DisplayTitle > SubTitle').text() : $el.find('ReferenceTitle SubTitle').text(), @@ -762,7 +786,7 @@ async function parseReleaseXml(source: string, $: cheerio.CheerioAPI, isDdex40: // resolve resources const resourceSelector = isDdex40 ? 'ResourceGroup ReleaseResourceReference, ResourceGroupContentItem > ReleaseResourceReference, LinkedReleaseResourceReference' - : 'ReleaseResourceReferenceList > ReleaseResourceReference, ResourceGroup ReleaseResourceReference' + : 'ReleaseResourceReferenceList > ReleaseResourceReference' $el .find(resourceSelector)