diff --git a/rest.js b/rest.js index 2d2e605..3f932c6 100644 --- a/rest.js +++ b/rest.js @@ -32,7 +32,7 @@ const { getImagesFromCommonsWithTitle } = require('./wikimedia-commons'); const { getImagesEuropeana } = require('./europeana'); const { getWikidata } = require('./wikidata'); const { getWikidataByLatLon } = require('./wikidata-latlon'); -const { findWikidataItemFromWikipedia, getWikipediaData } = require('./wikipedia'); +const { findWikidataItemFromWikipedia, getWikipediaData, getImageInfoFromWikipedia } = require('./wikipedia'); //does deprecating bodyParser make something dysfunctional? const urlencodedParser = bodyParser.urlencoded({ extended: false }); @@ -109,6 +109,18 @@ app.get('/wiki', asyncMiddleware(async function(req, res) { })); +app.get('/wiki/imageinfo', asyncMiddleware(async function(req, res) { + console.log(req.originalUrl); + + const language = req.query.language; + const titles = req.query.titles; + + const wikiImageInfo = await getImageInfoFromWikipedia(language, titles); + + res.send({ + wikiImageInfo + }); +})); app.get('/wiki/items/by/latlon', asyncMiddleware(async function(req, res) { console.log(req.originalUrl); diff --git a/wikimedia-commons.js b/wikimedia-commons.js index d31bc86..87a29e7 100644 --- a/wikimedia-commons.js +++ b/wikimedia-commons.js @@ -17,65 +17,88 @@ const requestConfigTemplate = { }; module.exports = { - async getImagesFromCommonsWithTitle(topic, commonsCategory) { - let requestConfig = requestConfigTemplate; + getImagesFromCommonsWithTitle, + getImageFromPage +}; - if (commonsCategory !== undefined) { - requestConfig.params.generator = 'categorymembers'; - requestConfig.params.gcmtype = 'file'; - requestConfig.params.gcmtitle = 'Category:' + commonsCategory; - requestConfig.params.gcmlimit = 30; - } else { - requestConfig.params.generator = 'search'; - requestConfig.params.gsrsearch = topic; - requestConfig.params.gsrlimit = 30; - requestConfig.params.gsrnamespace = 6; - } +function getImageFromPage(page, source){ + if (!page.imageinfo){ + return null; + } + const extMetadata = page.imageinfo[0].extmetadata; + let image = { + id: page.pageid, + source: source, + imageURL: page.imageinfo[0].url, + thumbURL: page.imageinfo[0].thumburl, + title: [], + creators: [], + uploader: page.imageinfo[0].user, + institutions: [], + infoURL: page.imageinfo[0].descriptionurl, + location: null, + geoLocations: [], + year: null, + license: '', + license_link: null, + description: [], + datecreated: [], + downloadURL: page.imageinfo[0].url, + }; - const response = await axios.request(requestConfig); + image.title.push(page.title.replace('File:', '').replace(/\.[^/.]+$/, '')); - if (!response.data.query || !response.data.query.pages) { - return []; - } - - return Object.keys(response.data.query.pages).map(p => { - const page = response.data.query.pages[p]; - let image = { - id: page.pageid, - source: 'Wikimedia Commons', - imageURL: page.imageinfo[0].url, - thumbURL: page.imageinfo[0].thumburl, - title: [], - creators: [], - uploader: page.imageinfo[0].user, - institutions: [], - infoURL: page.imageinfo[0].descriptionurl, - location: null, - geoLocations: [], - year: null, - license: '', - }; + if (extMetadata.GPSLatitude !== undefined && extMetadata.GPSLongitude !== undefined) { + image.geoLocations.push('POINT(' + extMetadata.GPSLongitude.value + ' ' + extMetadata.GPSLatitude.value + ')') + } - const extMetadata = page.imageinfo[0].extmetadata; - image.title.push(page.title.replace('File:', '').replace(/\.[^/.]+$/, '')); + if (extMetadata.DateTimeOriginal !== undefined) { + const dateString = extMetadata.DateTimeOriginal.value; + image.datecreated.push(dateString); + const year = parseInt(dateString.substr(0, 4), 10); + if (year !== NaN) { + image.year = year; + } + } + if (extMetadata.Artist) { + image.creators.push(extMetadata.Artist.value); + } + if (extMetadata.ImageDescription) { + image.description.push(extMetadata.ImageDescription.value); + } + if (extMetadata.LicenseShortName !== undefined) { + image.license = extMetadata.LicenseShortName.value; + } + if (extMetadata.LicenseUrl !== undefined) { + image.license_link = extMetadata.LicenseUrl.value; + } + + return image; +} - if (extMetadata.GPSLatitude !== undefined && extMetadata.GPSLongitude !== undefined) { - image.geoLocations.push('POINT(' + extMetadata.GPSLongitude.value + ' ' + extMetadata.GPSLatitude.value + ')') - } +async function getImagesFromCommonsWithTitle(topic, commonsCategory) { + let requestConfig = requestConfigTemplate; - if (extMetadata.DateTimeOriginal !== undefined) { - const dateString = extMetadata.DateTimeOriginal.value; - const year = parseInt(dateString.substr(0, 4), 10); - if (year !== NaN) { - image.year = year; - } - } + if (commonsCategory !== undefined) { + requestConfig.params.generator = 'categorymembers'; + requestConfig.params.gcmtype = 'file'; + requestConfig.params.gcmtitle = 'Category:' + commonsCategory; + requestConfig.params.gcmlimit = 30; + } else { + requestConfig.params.generator = 'search'; + requestConfig.params.gsrsearch = topic; + requestConfig.params.gsrlimit = 30; + requestConfig.params.gsrnamespace = 6; + } - if (extMetadata.LicenseShortName !== undefined) { - image.license = extMetadata.LicenseShortName.value; - } + const response = await axios.request(requestConfig); - return image; - }); + if (!response.data.query || !response.data.query.pages) { + return []; } -}; + + return Object.keys(response.data.query.pages).map(p => { + const page = response.data.query.pages[p]; + return getImageFromPage(page, 'Wikimedia Commons'); + }); +} \ No newline at end of file diff --git a/wikipedia.js b/wikipedia.js index b888bea..731eaa5 100644 --- a/wikipedia.js +++ b/wikipedia.js @@ -1,9 +1,11 @@ const axios = require('axios'); const cheerio = require('cheerio'); +const { getImageFromPage } = require('./wikimedia-commons'); module.exports = { findWikidataItemFromWikipedia, getWikipediaData, + getImageInfoFromWikipedia, }; async function findWikidataItemFromWikipedia(language, topic) { @@ -38,6 +40,60 @@ async function findWikidataItemFromWikipedia(language, topic) { return null; } +// input the title of an image, returns the first image item with metadata in the api results +async function getImageInfoFromWikipedia(language, titles) { + + titleString = decodeURIComponent(titles.join("|")); + var requestConfig = { + baseURL: "https://" + language + ".wikipedia.org/w/api.php", + method: "get", + responseType: "json", + headers: { + 'Api-User-Agent': process.env.WIKIDOCUMENTARIES_API_USER_AGENT + }, + params: { + action: "query", + prop: "imageinfo", + titles: titleString, + format: "json", + iiprop: "url|extmetadata", + iiextmetadatalanguage: language, + } + }; + const response = await axios.request(requestConfig); + if (response.data) { + let titleChanges = response.data.query.normalized; + const titleChangesMap = new Map(); + for (var titleChange of titleChanges){ + titleChangesMap.set(titleChange.to, titleChange.from); + } + const keys = [Object.keys(response.data.query.pages)][0]; + const pages = response.data.query.pages; + const orderedPages = Array(titles.length); + const decodeTitle = []; + for (var title of titles){ + title = decodeURIComponent(title); + decodeTitle.push(title); + } + for (var key of keys){ + let currImgTitle = pages[key]["title"]; + if (titleChangesMap.has(currImgTitle)){ + currImgTitle = titleChangesMap.get(currImgTitle); + } + let index = decodeTitle.indexOf(currImgTitle); + + orderedPages[index] = pages[key]; + + } + const images = []; + for (var page of orderedPages){ + images.push(getImageFromPage(page, 'Wikipedia')); + } + return images; + } + return null; +} + async function getWikipediaData(language, topic) { const encodedLanguage = language && encodeURIComponent(language);