dubdiff/server/components/wdiff/index.js
2016-11-03 19:16:54 -04:00

323 lines
8.7 KiB
JavaScript

'use strict';
var _ = require('lodash'),
temp = require('temp'),
fs = require('fs'),
exec = require('child_process').exec,
Lexer = require('lex');
var diff = require('diff');
// Automatically track and cleanup files at exit
temp.track();
// Perform a comparison between a and b
// the callback should have parameters (err, result)
module.exports = jsdiffEngine
function jsdiffEngine (a, b, asMarkdown, callback) {
//a few strings have to be escaped: "[-", "-]", "{+", and "+}"
a = escapeString(a)
b = escapeString(b)
var diffRes = diff.diffWordsWithSpace(a,b, {ignoreWhitespace:true})
var diffStr = diffRes.map (part => {
if (part.added) return "{+"+part.value+"+}";
else if (part.removed) return "[-"+part.value+"-]";
else return part.value;
}).join("");
//if no difference was found by wdiff, err.code will be 0
var wdiffSame;
wdiffSame = false; //???
console.log(diffStr)
var resData = {wdiffNoMarkdown:unescapeString(diffStr), same: wdiffSame};
if (asMarkdown) {
//!!! this needs more sophisticated parsing
var markdown = unescapeString(rewriteWdiffMarkdown(diffStr))
resData.wdiff=markdown;
}
return callback(null, resData);
}
function wdiffEngine (a, b, asMarkdown, callback) {
//!!! this nested file-open is not a good pattern
// better would be to use promises and write the two files asynchronously
//a few strings have to be escaped: "[-", "-]", "{+", and "+}"
a = escapeString(a)
b = escapeString(b)
// open the first file
temp.open('wdiffa-', function(err, filea) {
//handle errors
if (err)
return callback(err);
//write the string to the file
fs.write(filea.fd, a);
//close the file
fs.close(filea.fd, function(err) {
if (err)
return callback(err);
//open the second file
temp.open('wdiffa-', function(err, fileb) {
if (err)
return callback(err);
//write the string to the file
fs.write(fileb.fd, b);
//close the file
fs.close(fileb.fd, function(err) {
if (err)
return callback(err);
var cmd = "wdiff " + filea.path + " " +fileb.path;
exec(cmd, function(err, stdout) {
if (err && err.code!=1 && err.code!=0) {
return callback(err);
}
//if no difference was found by wdiff, err.code will be 0
var wdiffSame;
wdiffSame = (err && err.code == 0) ? true:false;
console.log(stdout)
var resData = {wdiffNoMarkdown:unescapeString(stdout), same: wdiffSame};
if (asMarkdown) {
//!!! this needs more sophisticated parsing
var markdown = unescapeString(rewriteWdiffMarkdown(stdout))
resData.wdiff=markdown;
}
return callback(null, resData);
});
});
});
});
});
}
/* Rewrites the given wdiff output to correctly render as markdown,
assuming the source documents were also valid markdown. */
function rewriteWdiffMarkdown(source) {
//initialize a stack for the lexed input
//make it a lodash container, just for kicks
var tokens = _([]);
//define tokens
var LDEL = {type:"LDEL"}, RDEL = {type:"RDEL"}, LINS = {type:"LINS"}, RINS = {type:"RINS"};
//var STRING = {type: "STRING", value:""};
var RDEL_LINS = {type:"RDEL_LINS"};
var NEWLINE = {type:"\n"};
var isStringToken = function (token) { return token.type == "STRING";}
//create a lexer to process the wdiff string
var lexer = new Lexer(function (char) {
//the default rule creates a string on the stack for unmatched characters
//and just adds characters to it as they come in
if (tokens.size() == 0 || !isStringToken(tokens.last()))
tokens.push({type: "STRING", value:""});
tokens.last().value += char;
});
//rules for the newline character,
//as well as opening and closing (left and right) delete and insert tokens
lexer
.addRule(/\[-/, function () {
tokens.push(LDEL);
})
.addRule(/-\]/, function () {
tokens.push(RDEL);
})
.addRule(/{\+/, function () {
tokens.push(LINS);
})
.addRule(/\+}/, function () {
tokens.push(RINS);
})
//we have a special rule for joined delete and insert tokens
.addRule(/-\] {\+/, function() {
tokens.push(RDEL_LINS);
})
.addRule(/\n/, function () {
//tokens.push({type:"STRING", value:"\n"})
tokens.push(NEWLINE);
})
;
//do the lexing
lexer.setInput(source);
lexer.lex();
//# now we parse and transform the input
//create a stack for the transformed output
var transform = _([]);
//set the state variables for the parse
var SSTRING = "string", SINS = "ins", SDEL = "del", SDELINS = "delins";
var state = SSTRING;
//this is the index of the immediately previous delete string in the transform stack
var deleteStartIndex = -1
//iterate the input tokens to create the intermediate representation
tokens.forEach(function(token) {
//we add string tokens to the transformed stack
if (isStringToken(token)) {
//add the string with state information
var item = {
string: token.value,
state: state
};
//if this is the DELINS state, we will put the string in the transformed stack in a different order
// the INS string is spliced into place just after the first DEL string
// the point of this is so that the preceeding markdown formatting instructions
// on this line are applied equally to the del and ins strings
// an extra space is inserted between DEL and INS items, for readibility
if (state == SDELINS) {
state = SINS;
item.state = SINS;
var spaceItem = {string: ' ', state: SSTRING};
transform.splice(deleteStartIndex+1, 0, item);
transform.splice(deleteStartIndex+1, 0, spaceItem);
}
else {
transform.push(item);
}
}
//the various tokens control the transformation mode
if (token == LDEL) {
state = SDEL;
deleteStartIndex = transform.size();
}
if (token == LINS) {
state = SINS;
}
if (token == RDEL || token == RINS) {
state = SSTRING;
deleteStartIndex = -1;
}
if (token == RDEL_LINS) {
state = SDELINS;
}
if (token == NEWLINE) {
transform.push({string: '\n', state: state});
}
//ignore newlines (they get added to the output)
});
// * now emit the output string
var output = "";
var newline = true;
var newlineIndex = -1;
// prefixes are matched as follows:
// ^ - start of line
// ([ \t]*\>)* - blockquotes (possibly nested)
// (
// ([ \t]*#*) - headers
// |([ \t]+[\*\+-]) - unordered lists
// |([ \t]+[0-9]+\.) - numeric lists
// )?
// [ \t]* - trailing whitespace
//var PREFIX = /^([ \t]*\>)*(([ \t]*#*)|([ \t]*[\*\+-])|([ \t]*[\d]+\.))?[ \t]+/
var PREFIX = /^([ \t]*\>)*(([ \t]*#*)|([ \t]*[\*\+-])|([ \t]*[\d]+\.))?[ \t]*/
//var PREFIX = /^#*/
transform.forEach(function(item) {
//newlines are undecorated
if (item.string == '\n') {
output += '\n';
//flag the new line
newline = true;
//and record the offset in the output string
newlineIndex = output.length;
return
}
//wrap del strings with tags
if (item.state == SDEL) {
output += '<del>' + item.string + '</del>';
//del doesn't reset the newline state
}
//ins strings have to be handled a little differently:
//if this is an ins just after a newline, or after a del after a newline, we need to peel off any markdown formatting prefixes and insert them at the beginning of the line outside the del/ins tags
else if (item.state == SINS && newline) {
var prestring, poststring;
var match = item.string.match(PREFIX);
if (match == null)
prestring ="";
else
prestring = match[0];
poststring = item.string.substring(prestring.length);
output = output.substring(0, newlineIndex) + prestring + output.substring(newlineIndex);
output += '<ins>' + poststring + '</ins>';
newline = false;
newlineIndex = -1;
}
else if (item.state == SINS) {
output += '<ins>' + item.string + '</ins>';
}
//and just output other strings
else {
output += item.string;
//this resets the newline state
newline = false;
newlineIndex = -1;
}
});
return output;
}
function escapeString(str) {
str = str.replace(/\[\-/gm, "&#91;-")
str = str.replace(/\-\]/gm, "-&#93;")
return str
}
function unescapeString(str) {
str = str.replace(/\&\#91\;-/gm, "[-")
str = str.replace(/-\&\#93\;/gm, "-]")
return str
}