User:TheDJ/datacheck.js
Jump to navigation
Jump to search
Note: After saving, you have to bypass your browser's cache to see the changes. Internet Explorer: press Ctrl-F5, Mozilla: hold down Shift while clicking Reload (or press Ctrl-Shift-R), Opera/Konqueror: press F5, Safari: hold down Shift + Alt while clicking Reload, Chrome: hold down Shift while clicking Reload.
Documentation for this user script can be added at User:TheDJ/datacheck. |
/*
* Purpose: Show as much of the data as we can know in as 'raw' a format possible
* This can help those who are interested in cleaning up metadata to find cases which are currently subpar
*
* Licensed: MIT
*
* TODO:
* - Make it collapsible
* - Add link to place for editors to collaborate on improving
* - Add checks for duplicate statements
* - Add checks for complex HTML
* - Add checks for Geo location/orientation (camera and object)
* - Add checks for FoP
* - Add checks for derivates (using category)
* - Run checks on the database for templates using the classes that we might not know about
* - Add checks for type and microformats
* - Add checks for campaigns and or institute donations
* - Add check for retouched + user
* - Add check for FP/QI/VI
* - Add checks for catalogue numbers/source ids (NASA image id, NARA etc)
*/
( function ( $, mw ) {
'use strict';
var $metadataView,
warnings = [],
errors = [];
function reportAPIMetadata( data ) {
$(function() {
// add to the info to block
var table = $('<table>');
table.addClass( 'mw_metadata datacheck' );
function buildRow( header, originalValue ) {
var value = originalValue;
if ( value._type && value._type === 'lang' ) {
value = "";
$.each( originalValue, function( langCode, description ) {
if ( langCode === '_type' ) {
return;
} else {
value += langCode + ": " + description + "<br />";
}
} );
}
return $("<tr>")
.append( $("<th>").html( header ) )
.append( $("<td>").html( value ) );
}
for( var page in data.query.pages ) {
/*jshint -W083 */
$.each( data.query.pages[page].imageinfo[0].extmetadata, function( k, v ) {
table.append( buildRow(k, v.value ) );
} );
break; // Just one page
}
table.appendTo( $metadataView.find( '.metadata-api' ) );
} );
}
function getAPIMetadata() {
var api = new mw.Api();
api.get( {
action: 'query',
titles: mw.config.get( 'wgPageName' ),
prop: 'imageinfo',
iiprop: 'timestamp|user|url|size|mime|mediatype|extmetadata',
iiextmetadatalanguage: mw.config.get( 'wgUserLanguage' ),
iimetadataversion: 'latest',
iiextmetadatamultilang: '',
iiextmetadatafilter: [
'ObjectName', // title of a book for instance
'DateTimeOriginal', // date time from desc page
'ImageDescription', // from desc page
'Copyrighted', // from desc page, false if PD
'License', // from template
'LicenseShortName', // from desc page
'UsageTerms', // 'long name of terms' from desc page
'LicenseUrl', // link to license deed
'Credit', // source? from desc page
'Artist' // author/copyright holder from desc page
// 'DateTime', // datetime from EXIF file data
// 'GPSLatitude', // let this stuff be for now.
// 'GPSLongitude',
// 'Categories',
// 'Permission'
].join('|')
} ).done ( reportAPIMetadata );
}
function collectPageMetadata() {
var metadata = {},
temp;
metadata.html = {};
metadata.restrictions = {};
// Check for {{Information}}
if ( $(".commons-file-information-table").length < 1 ) {
errors.push( "No {{Information}}-template seems to be present");
}
// From {{Information}}
metadata.html.description = $( '#fileinfotpl_desc + td' );
metadata.description = {};
temp = metadata.html.description.find( '.description[lang]' );
if( temp.length > 0 ) {
temp.each( function( i, e ) {
var langValue = $( e ).clone();
langValue.find( '.language' ).remove();
metadata.description[ $( e ).attr( 'lang' ) ] = scrapeText( langValue );
} );
} else {
// warn about lack of language info
metadata.description = scrapeText( metadata.html.description );
}
metadata.html.author = $( '#fileinfotpl_aut + td' );
metadata.author = scrapeText( metadata.html.author );
metadata.html.date = $( '#fileinfotpl_date + td' );
metadata.date = scrapeText( metadata.html.date );
// From {{date}}
temp = metadata.html.date.find( 'time[datetime]' );
if ( temp.length > 0 ) {
metadata.date = temp.attr( 'datetime' );
// register that this value is ISO unit
} else {
// warn that the date is not ISO, or not recognized as such
}
metadata.html.source = $( '#fileinfotpl_src + td' );
metadata.source = scrapeText( metadata.html.source );
// From {{Credit line}}, explicit attribution statement, supersedes what we can manufacture ourselves
metadata.html.attribution = $( '.fileinfotpl_credit + td' );
metadata.attribution = scrapeText( metadata.html.attribution );
// From {{own}}, usually contained in #fileinfotpl_src
metadata.ownwork = $( '#own-work, .int-own-work' ).length > 0;
// From {{Creator}} which is usually contained in #fileinfotpl_aut
metadata.html.creator = $( '#creator' );
metadata.creator = scrapeText( $( '#creator' ) );
// {{Personality rights}}
metadata.restrictions.personality_rights = $( '#commons-template-personality-rights' ).length > 0;
metadata.restrictions.trademarked = $( '.restriction-trademarked' ).length > 0;
metadata.licenses = getLicenses();
return metadata;
}
function getLicenses() {
var licenses, $readable;
licenses = [];
$readable = $('.licensetpl');
$readable.each(function () {
var cL = {
link: $(this).find('.licensetpl_link').html(),
'short': $(this).find('.licensetpl_short').html(),
'long': $(this).find('.licensetpl_long').html(),
attr: $(this).find('.licensetpl_attr').html(),
aut: $(this).find('.licensetpl_aut').html(),
link_req: $(this).find('.licensetpl_link_req').html(),
attr_req: $(this).find('.licensetpl_attr_req').html()
};
if (cL.short) {
licenses.push(cL);
}
});
return licenses;
}
function reportScrapedData( data ) {
// add to the info to block
var table = $('<table>');
table.addClass( 'mw_metadata datacheck' );
function buildRow( header, originalValue ) {
var value = originalValue;
if ( $.isPlainObject( originalValue ) ) {
value = "";
$.each( originalValue, function ( k, v ) {
value += k + ': ' + v + '<br />';
} );
}
if ( typeof value === 'boolean' ) {
value = value.toString();
}
return $("<tr>")
.append( $("<th>").text( header ) )
.append( $("<td>").html( value ) );
}
$.each( data, function ( key, value ) {
if ( key === 'licenses' || key === 'restrictions' || key === 'html' ) {
return;
}
table.append( buildRow( key, value ) );
});
table.append( buildRow( 'licenseCount', data.licenses.length ) );
$.each( data.restrictions, function ( key, value ) {
table.append( buildRow( key, value ) );
} );
table.appendTo( $metadataView.find( '.metadata-scraped' ) );
var $licenseTable = $( '<table>' );
$licenseTable.addClass( 'mw_metadata datacheck licenses' );
$licenseTable.append( $('<tr><th>Short name</th><th>Long name</th><th>License link</th><th>Attribution</th><th>Author</th>') );
function buildLicenseRow( license ) {
var $tr = $('<tr>');
$tr.append( $( '<td>' ).html(license.short) );
$tr.append( $( '<td>' ).html(license.long) );
var required = license.link_req != "false";
if ( required ) {
required = ' <b>(Required)</b>';
} else {
required = ' <b>(Not required)</b>';
}
$tr.append( $( '<td>' ).html( license.link + required ) );
required = license.attr_req != "false";
if ( required ) {
required = '<b>Required</b> ';
} else {
required = '<b>Not required</b> ';
}
$tr.append( $( '<td>' ).html( required + ( license.attr ? license.attr : "" ) ) );
$tr.append( $( '<td>' ).html( (license.aut ? license.aut : "" ) ) );
return $tr;
}
$.each( data.licenses, function ( i, license ) {
$licenseTable.append( buildLicenseRow( license ) );
} );
$licenseTable.appendTo( $metadataView.find( '.metadata-scraped' ) );
}
function scrapeText( $element ) {
$element = $element.clone();
$element.find( 'style' ).remove();
return $.trim( $element.text() );
}
function initDataCheck() {
mw.util.addCSS( 'table.datacheck { width: auto; } table.datacheck td, table.datacheck th { text-align: left; } table.datacheck th { font-weight:bold;}' );
getAPIMetadata();
$( function() {
$metadataView = $( '<div>' );
$metadataView.addClass( 'metadata-content' );
$metadataView.append( '<h2>Metadata API</h2>' );
$metadataView.append( '<div class="metadata-api"></div>' );
$metadataView.append( '<h2><a href="//commons.wikimedia.org/wiki/Commons:Machine-readable_data">Machine-readable metadata</a></h2>' );
$metadataView.append( '<div class="metadata-scraped"></div>' );
$( '#mw-imagepage-content').before( $metadataView );
reportScrapedData( collectPageMetadata() );
} );
}
if ( mw.config.get( 'wgNamespaceNumber') === 6 &&
mw.config.get( 'wgAction' ) === 'view' &&
mw.config.get( 'wgArticleId' ) !== 0 &&
!mw.util.getParamValue('diff') )
{
mw.loader.using( 'mediawiki.api', initDataCheck );
}
} )( jQuery, mediaWiki );