2015-02-07 19:49:04 +01:00
'use strict' ;
var _ = require ( 'lodash' ) ,
temp = require ( 'temp' ) ,
fs = require ( 'fs' ) ,
2015-02-09 02:42:31 +01:00
exec = require ( 'child_process' ) . exec ,
Lexer = require ( 'lex' ) ;
2015-02-07 19:49:04 +01:00
// Automatically track and cleanup files at exit
temp . track ( ) ;
// Perform a comparison between a and b
2015-04-18 05:12:00 +02:00
// the callback should have parameters (err, result)
2015-02-07 19:49:04 +01:00
module . exports = function ( a , b , asMarkdown , callback ) {
//!!! this nested file-open is not a good pattern
// better would be to use promises and write the two files asynchronously
// open the first file
temp . open ( 'wdiffa-' , function ( err , filea ) {
//handle errors
2015-04-18 05:12:00 +02:00
if ( err )
2015-02-07 19:49:04 +01:00
return callback ( err ) ;
//write the string to the file
fs . write ( filea . fd , a ) ;
//close the file
fs . close ( filea . fd , function ( err ) {
if ( err )
return callback ( err ) ;
//open the second file
temp . open ( 'wdiffa-' , function ( err , fileb ) {
2015-04-18 05:12:00 +02:00
if ( err )
2015-02-07 19:49:04 +01:00
return callback ( err ) ;
2015-04-17 18:59:48 +02:00
//write the string to the file
2015-02-07 19:49:04 +01:00
fs . write ( fileb . fd , b ) ;
//close the file
fs . close ( fileb . fd , function ( err ) {
if ( err )
return callback ( err ) ;
var cmd = "./bin/wdiff " + filea . path + " " + fileb . path ;
exec ( cmd , function ( err , stdout ) {
2015-04-17 18:59:48 +02:00
//console.log(cmd);
//console.log(err);
2015-04-18 05:12:00 +02:00
//console.log(stdout);
if ( err && err . code != 1 && err . code != 0 ) {
2015-02-07 19:49:04 +01:00
return callback ( err ) ;
}
//if no difference was found by wdiff, err.code will be 0
var wdiffSame ;
wdiffSame = ( err && err . code == 0 ) ? true : false ;
var resData = { wdiff : stdout , same : wdiffSame } ;
if ( asMarkdown ) {
//!!! this needs more sophisticated parsing
2015-02-09 02:42:31 +01:00
var markdown = rewriteWdiffMarkdown ( stdout )
2015-04-18 05:12:00 +02:00
2015-02-07 19:49:04 +01:00
resData . wdiff = markdown ;
}
return callback ( null , resData ) ;
} ) ;
} ) ;
} ) ;
} ) ;
} ) ;
}
2015-02-09 02:42:31 +01:00
/ * R e w r i t e s t h e g i v e n w d i f f o u t p u t t o c o r r e c t l y r e n d e r a s m a r k d o w n ,
assuming the source documents were also valid markdown . * /
function rewriteWdiffMarkdown ( source ) {
//initialize a stack for the lexed input
2015-04-18 05:12:00 +02:00
//make it a lodash container, just for kicks
2015-02-09 02:42:31 +01:00
var tokens = _ ( [ ] ) ;
//define tokens
var LDEL = { type : "LDEL" } , RDEL = { type : "RDEL" } , LINS = { type : "LINS" } , RINS = { type : "RINS" } ;
//var STRING = {type: "STRING", value:""};
var RDEL _LINS = { type : "RDEL_LINS" } ;
var NEWLINE = { type : "\n" } ;
2015-04-18 05:12:00 +02:00
var isStringToken = function ( token ) { return token . type == "STRING" ; }
2015-02-09 02:42:31 +01:00
//create a lexer to process the wdiff string
var lexer = new Lexer ( function ( char ) {
//the default rule creates a string on the stack for unmatched characters
2015-04-18 05:12:00 +02:00
//and just adds characters to it as they come in
2015-02-09 02:42:31 +01:00
if ( tokens . size ( ) == 0 || ! isStringToken ( tokens . last ( ) ) )
tokens . push ( { type : "STRING" , value : "" } ) ;
tokens . last ( ) . value += char ;
} ) ;
2015-04-18 05:12:00 +02:00
//rules for the newline character,
2015-02-09 02:42:31 +01:00
//as well as opening and closing (left and right) delete and insert tokens
lexer
. addRule ( /\[-/ , function ( ) {
tokens . push ( LDEL ) ;
} )
. addRule ( /-\]/ , function ( ) {
tokens . push ( RDEL ) ;
} )
. addRule ( /{\+/ , function ( ) {
tokens . push ( LINS ) ;
} )
. addRule ( /\+}/ , function ( ) {
tokens . push ( RINS ) ;
} )
//we have a special rule for joined delete and insert tokens
. addRule ( /-\] {\+/ , function ( ) {
tokens . push ( RDEL _LINS ) ;
} )
. addRule ( /\n/ , function ( ) {
//tokens.push({type:"STRING", value:"\n"})
tokens . push ( NEWLINE ) ;
} )
;
2015-04-18 05:12:00 +02:00
2015-02-09 02:42:31 +01:00
//do the lexing
lexer . setInput ( source ) ;
lexer . lex ( ) ;
//# now we parse and transform the input
//create a stack for the transformed output
var transform = _ ( [ ] ) ;
//set the state variables for the parse
var SSTRING = "string" , SINS = "ins" , SDEL = "del" , SDELINS = "delins" ;
var state = SSTRING ;
2015-04-18 05:12:00 +02:00
//this is the index of the immediately previous delete string in the transform stack
2015-02-09 02:42:31 +01:00
var deleteStartIndex = - 1
2015-04-18 05:12:00 +02:00
//iterate the input tokens to create the intermediate representation
2015-02-09 02:42:31 +01:00
tokens . forEach ( function ( token ) {
//we add string tokens to the transformed stack
if ( isStringToken ( token ) ) {
//add the string with state information
var item = {
string : token . value ,
state : state
} ;
//if this is the DELINS state, we will put the string in the transformed stack in a different order
// the INS string is spliced into place just after the first DEL string
// the point of this is so that the preceeding markdown formatting instructions
// on this line are applied equally to the del and ins strings
// an extra space is inserted between DEL and INS items, for readibility
if ( state == SDELINS ) {
state = SINS ;
item . state = SINS ;
var spaceItem = { string : ' ' , state : SSTRING } ;
transform . splice ( deleteStartIndex + 1 , 0 , item ) ;
transform . splice ( deleteStartIndex + 1 , 0 , spaceItem ) ;
}
else {
transform . push ( item ) ;
}
}
//the various tokens control the transformation mode
if ( token == LDEL ) {
state = SDEL ;
deleteStartIndex = transform . size ( ) ;
}
if ( token == LINS ) {
state = SINS ;
}
if ( token == RDEL || token == RINS ) {
state = SSTRING ;
deleteStartIndex = - 1 ;
}
if ( token == RDEL _LINS ) {
state = SDELINS ;
}
if ( token == NEWLINE ) {
transform . push ( { string : '\n' , state : state } ) ;
}
//ignore newlines (they get added to the output)
} ) ;
// * now emit the output string
var output = "" ;
var newline = true ;
2015-04-18 05:12:00 +02:00
var newlineIndex = - 1 ;
2015-02-09 02:42:31 +01:00
// prefixes are matched as follows:
// ^ - start of line
// ([ \t]*\>)* - blockquotes (possibly nested)
// (
2015-04-18 05:12:00 +02:00
// ([ \t]*#*) - headers
2015-02-09 02:42:31 +01:00
// |([ \t]+[\*\+-]) - unordered lists
// |([ \t]+[0-9]+\.) - numeric lists
// )?
// [ \t]+ - trailing whitespace
2015-04-18 05:12:00 +02:00
//var PREFIX = /^([ \t]*\>)*(([ \t]*#*)|([ \t]*[\*\+-])|([ \t]*[\d]+\.))?[ \t]+/
var PREFIX = /^([ \t]*\>)*(([ \t]*#*)|([ \t]*[\*\+-])|([ \t]*[\d]+\.))?[ \t]*/
//var PREFIX = /^#*/
2015-02-09 02:42:31 +01:00
transform . forEach ( function ( item ) {
//newlines are undecorated
if ( item . string == '\n' ) {
output += '\n' ;
2015-04-18 05:12:00 +02:00
//flag the new line
2015-02-09 02:42:31 +01:00
newline = true ;
2015-04-18 05:12:00 +02:00
//and record the offset in the output string
newlineIndex = output . length ;
2015-02-09 02:42:31 +01:00
return
}
2015-04-18 05:12:00 +02:00
//wrap del strings with tags
if ( item . state == SDEL ) {
output += '<del>' + item . string + '</del>' ;
//del doesn't reset the newline state
}
2015-02-09 02:42:31 +01:00
2015-04-18 05:12:00 +02:00
//ins strings have to be handled a little differently:
//if this is an ins just after a newline, or after a del after a newline, we need to peel off any markdown formatting prefixes and insert them at the beginning of the line outside the del/ins tags
else if ( item . state == SINS && newline ) {
var prestring , poststring ;
2015-02-09 02:42:31 +01:00
var match = item . string . match ( PREFIX ) ;
2015-04-18 05:12:00 +02:00
if ( match == null )
2015-02-09 02:42:31 +01:00
prestring = "" ;
else
prestring = match [ 0 ] ;
poststring = item . string . substring ( prestring . length ) ;
2015-04-18 05:12:00 +02:00
output = output . substring ( 0 , newlineIndex ) + prestring + output . substring ( newlineIndex ) ;
output += '<ins>' + poststring + '</ins>' ;
newline = false ;
newlineIndex = - 1 ;
2015-02-09 02:42:31 +01:00
}
2015-04-18 05:12:00 +02:00
else if ( item . state == SINS ) {
output += '<ins>' + item . string + '</ins>' ;
}
2015-02-09 02:42:31 +01:00
//and just output other strings
2015-04-18 05:12:00 +02:00
else {
output += item . string ;
//this resets the newline state
newline = false ;
newlineIndex = - 1 ;
}
2015-02-09 02:42:31 +01:00
} ) ;
2015-04-18 05:12:00 +02:00
2015-02-09 02:42:31 +01:00
return output ;
2015-04-17 18:59:48 +02:00
}