User:Lupin/diff.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/*
 * Javascript Diff Algorithm
 *  By John Resig (http://ejohn.org/) and [[:en:User:Lupin]]
 *
 * More Info:
 *  http://ejohn.org/projects/javascript-diff-algorithm/
 */

function diffEscape(n) {
    return n.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;")
      .replace(RegExp('"','g'), "&quot;");
}
function delFmt(x) { 
  if (!x.length) return ''; 
  return "<del style='background:#FFE6E6;'>" + diffEscape(x.join('')) +"</del>"; 
}
function insFmt(x) { 
  if (!x.length) return '';
  return "<ins style='background:#AAFFEE;'>" + diffEscape(x.join('')) +"</ins>"; 
}

function copyDiffObj(x){
  var ret=[];
  for (var i=0; i<x.length; ++i) {
    if (typeof x[i] == 'string') ret[i]=x[i];
    else {
      ret[i]={};
      for (var prop in x[i]) ret[i][prop]=x[i][prop];
    }
  }
  return ret;
}

function countCrossings(a, b, i) {
  // count the crossings on the edge starting at b[i]
  if (b[i].row==null) return -1;
  var count=0;
  for (var j=0; j<a.length; ++j) {
    if (a[j].row==null) continue;
    if ( (j-b[i].row)*(i-a[j].row) > 0) count++;
  }
  return count;
}

//function debug(s){
//  try {document.getElementById('debug').value+=s+'\n'; } catch(foo) {};
//}

function untangle( a, b) {
  // try to remove crossing edges from an ordered bipartite graph,
  // removing the least number possible
  var aa=copyDiffObj(a);
  var bb=copyDiffObj(b);

  // remove the edge with the largest number of crossings until no
  // crossings remain
  do {
    var maxCrossings=0;
    var worstEdge=null;
    for (var i=0; i<bb.length; ++i) {
      var c=countCrossings(aa,bb,i);
      if (c > maxCrossings) { maxCrossings=c; worstEdge=i; }
    }
    if (worstEdge!=null) {
      aa[ bb[worstEdge].row ] = aa[ bb[worstEdge].row ].text;
      bb[worstEdge] = bb[worstEdge].text;
    }
  } while (maxCrossings > 0);
  return { a: aa, b: bb };
}

window.max=function(a,b){return a<b ? b : a;}
window.min=function(a,b){return a>b ? b : a;}

function shortenDiffString(str, context) {
  var output=[];
  var s=str;
  var m=true;
  while ( true ) {
    var re=RegExp('(<del[\\s\\S]*?</del>|<ins[\\s\\S]*?</ins>)+');
    m=re.exec(s);
    if(!m) break;
    var t=(s.substring(max(0,m.index-context), min(s.length, m.index+m[0].length+context)));
    //alert(s+'\n\n\n'+m[0] +'\n\n'+(m.index-context) + '\n\n'+t); 
    output.push(t);
    s=s.substring(min(s.length, m.index+m[0].length));
  }
  return output;
}

function diffString( o, n ) {
  var out = diff( o.split(/\b/), n.split(/\b/) );
  var str = "";
  var acc=[]; // accumulator for prettier output

  // crossing pairings -- 'A B' vs 'B A' -- cause problems, so let's iron them out 
  //var untangled=untangle(out.o, out.n); <-- too slow!
  //out.o=untangled.a; out.n=untangled.b;
  var maxOutputPair=0;
  for (var i=0; i<out.n.length; ++i) {
    if ( out.n[i].row != null) { 
      if( maxOutputPair > out.n[i].row ) {
        // tangle - delete pairing
        out.o[ out.n[i].row ]=out.o[ out.n[i].row ].text;
        out.n[i]=out.n[i].text;
      }
      if (maxOutputPair < out.n[i].row) maxOutputPair = out.n[i].row;
    }
  }
  

  // output the stuff preceding the first paired old line 
  for (var i=0; i<out.o.length && out.o[i].text == null; ++i) acc.push( out.o[i] );
  str += delFmt(acc); acc=[];
  
  // main loop 
  for ( var i = 0; i < out.n.length; ++i ) {
    // output unpaired new "lines" 
    while ( i < out.n.length && out.n[i].text == null ) acc.push( out.n[i++] ); 
    str += insFmt(acc); acc=[]; 
    if ( i < out.n.length ) { // this new "line" is paired with the (out.n[i].row)th old "line" 
      str += out.n[i].text;
      // output unpaired old rows starting after this new line's partner 
      var m = out.n[i].row + 1; 
      while ( m < out.o.length && out.o[m].text == null ) acc.push ( out.o[m++] );
      str += delFmt(acc); acc=[];
    }
  }
  return str;
}

function diff( o, n ) {
  var ns = new Object();
  var os = new Object();
  
  // pass 1: make hashtable ns with new rows as keys
  for ( var i = 0; i < n.length; i++ ) {
    if ( ns[ n[i] ] == null )
      ns[ n[i] ] = { rows: new Array(), o: null };
    ns[ n[i] ].rows.push( i );
  }
  
  // pass 2: make hashtable os with old rows as keys
  for ( var i = 0; i < o.length; i++ ) {
    if ( os[ o[i] ] == null )
      os[ o[i] ] = { rows: new Array(), n: null };
    os[ o[i] ].rows.push( i );
  }
  
  // pass 3: pair unique new rows and matching unique old rows
  for ( var i in ns ) {
    if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
      n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
      o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
    }
  }
  
  // pass 4: pair matching rows immediately following paired rows (not necessarily unique)
  for ( var i = 0; i < n.length - 1; i++ ) {
    if ( n[i].text != null && n[i+1].text == null && 
         n[i].row < o.length - 1 && o[ n[i].row + 1 ].text == null && 
         n[i+1] == o[ n[i].row + 1 ] ) {
      n[i+1] = { text: n[i+1], row: n[i].row + 1 };
      o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
    }
  }
  
  // pass 5: pair matching rows immediately preceding paired rows (not necessarily unique)
  for ( var i = n.length - 1; i > 0; i-- ) {
    if ( n[i].text != null && n[i-1].text == null && 
         n[i].row > 0 && o[ n[i].row - 1 ].text == null && 
         n[i-1] == o[ n[i].row - 1 ] ) {
      n[i-1] = { text: n[i-1], row: n[i].row - 1 };
      o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
    }
  }
  
  return { o: o, n: n };
}