#!/usr/bin/env ruby # DocDiff: word/character-oriented text comparison utility # Copyright (C) 2002-2011 Hisashi MORITA # Requirements: Ruby (>= 1.8) require 'docdiff' require 'docdiff/difference' require 'docdiff/document' require 'docdiff/view' require 'optparse' # do_config_stuff default_config = { :resolution => "word", :encoding => "auto", :eol => "auto", :format => "html", :cache => true, :digest => false, :verbose => false } clo = command_line_options = {} # if invoked as "worddiff" or "chardiff", # appropriate resolution is set respectively. case File.basename($0, ".*") when "worddiff" then; clo[:resolution] = "word" when "chardiff" then; clo[:resolution] = "char" end ARGV.options {|o| o.def_option('--resolution=RESOLUTION', possible_resolutions = ['line', 'word', 'char'], 'specify resolution (granularity)', possible_resolutions.join('|') + ' (default is word)' ){|s| clo[:resolution] = (s || "word")} o.def_option('--line', 'set resolution to line'){clo[:resolution] = "line"} o.def_option('--word', 'set resolution to word'){clo[:resolution] = "word"} o.def_option('--char', 'set resolution to char'){clo[:resolution] = "char"} o.def_option('--encoding=ENCODING', possible_encodings = ['ASCII','EUC-JP','Shift_JIS','UTF-8','auto'], 'specify character encoding', possible_encodings.join('|'), "(default is auto. try ASCII for single byte encodings such as ISO-8859-X)" ){|s| clo[:encoding] = (s || "auto")} o.def_option('--ascii', 'same as --encoding=ASCII'){clo[:encoding] = "ASCII"} o.def_option('--iso8859x', 'same as --encoding=ASCII'){clo[:encoding] = "ASCII"} o.def_option('--eucjp', 'same as --encoding=EUC-JP'){clo[:encoding] = "EUC-JP"} o.def_option('--sjis', 'same as --encoding=Shift_JIS'){clo[:encoding] = "Shift_JIS"} o.def_option('--utf8', 'same as --encoding=UTF-8'){clo[:encoding] = "UTF-8"} o.def_option('--eol=EOL', possible_eols = ['CR','LF','CRLF','auto'], 'specify end-of-line character', possible_eols.join('|') + ' (default is auto)' ){|s| clo[:eol] = (s || "auto")} o.def_option('--cr', 'same as --eol=CR'){clo[:eol] = "CR"} o.def_option('--lf', 'same as --eol=LF'){clo[:eol] = "LF"} o.def_option('--crlf', 'same as --eol=CRLF'){clo[:eol] = "CRLF"} o.def_option('--format=FORMAT', possible_formats = ['tty','manued','html','wdiff','stat','user'], 'specify output format', possible_formats.join('|'), "(default is html)", '(user tags can be defined in config file)' ){|s| clo[:format] = (s || "manued")} o.def_option('--tty', 'same as --format=tty'){clo[:format] = "tty"} o.def_option('--manued', 'same as --format=manued'){clo[:format] = "manued"} o.def_option('--html', 'same as --format=html'){clo[:format] = "html"} o.def_option('--wdiff', 'same as --format=wdiff'){clo[:format] = "wdiff"} o.def_option('--stat', 'same as --format=stat (not supported yet)'){clo[:format] = "stat"} o.def_option('--label LABEL', '-L LABEL', 'Use label instead of filename (not supported yet)' ){|s1, s2| clo[:label1], clo[:label2] = s1, s2} o.def_option('--digest', 'digest output, do not show all'){clo[:digest] = true} o.def_option('--summary', 'same as --digest'){clo[:digest] = true} o.def_option('--display=DISPLAY', possible_types = ['inline', 'multi'], 'specify presentation type (effective only with digest; experimental feature)', possible_types.join('|'), '(default is inline)'){|s| clo[:display] ||= s.downcase} o.def_option('--cache', 'use file cache (not supported yet)'){clo[:cache] = true} o.def_option('--no-config-file', 'do not read config files'){clo[:no_config_file] = true} o.def_option('--verbose', 'run verbosely (not supported yet)'){clo[:verbose] = true} o.def_option('--help', 'show this message'){puts o; exit(0)} o.def_option('--version', 'show version'){puts DocDiff::AppVersion; exit(0)} o.def_option('--license', 'show license'){puts DocDiff::License; exit(0)} o.def_option('--author', 'show author(s)'){puts DocDiff::Author; exit(0)} o.on_tail("When invoked as worddiff or chardiff, resolution will be set accordingly.", "Config files: /etc/docdiff/docdiff.conf, ~/etc/docdiff/docdiff.conf") o.parse! } or exit(1) docdiff = DocDiff.new() docdiff.config.update(default_config) unless clo[:no_config_file] == true # process_commandline_option message = docdiff.process_config_file(DocDiff::SystemConfigFileName) if clo[:verbose] == true || docdiff.config[:verbose] == true STDERR.print message end # message = docdiff.process_config_file(DocDiff::UserConfigFileName) case when File.exist?(DocDiff::UserConfigFileName) && File.exist?(DocDiff::AltUserConfigFileName) raise "#{DocDiff::UserConfigFileName} and #{DocDiff::AltUserConfigFileName} cannot be used at the same time. Remove or rename either one." when File.exist?(DocDiff::UserConfigFileName) message = docdiff.process_config_file(DocDiff::UserConfigFileName) when File.exist?(DocDiff::AltUserConfigFileName) message = docdiff.process_config_file(DocDiff::AltUserConfigFileName) end if clo[:verbose] == true || docdiff.config[:verbose] == true STDERR.print message end end docdiff.config.update(clo) # config stuff done # process the documents file1_content = nil file2_content = nil raise "Try `#{File.basename($0)} --help' for more information." if ARGV[0].nil? raise "Specify at least 2 target files." unless ARGV[0] && ARGV[1] raise "No such file: #{ARGV[0]}." unless FileTest.exist?(ARGV[0]) raise "No such file: #{ARGV[1]}." unless FileTest.exist?(ARGV[1]) raise "#{ARGV[0]} is not a file." unless FileTest.file?(ARGV[0]) raise "#{ARGV[1]} is not a file." unless FileTest.file?(ARGV[1]) File.open(ARGV[0], "r"){|f| file1_content = f.read} File.open(ARGV[1], "r"){|f| file2_content = f.read} doc1 = nil doc2 = nil encoding1 = docdiff.config[:encoding] encoding2 = docdiff.config[:encoding] eol1 = docdiff.config[:eol] eol2 = docdiff.config[:eol] if docdiff.config[:encoding] == "auto" encoding1 = CharString.guess_encoding(file1_content) encoding2 = CharString.guess_encoding(file2_content) case when (encoding1 == "UNKNOWN" or encoding2 == "UNKNOWN") raise "Document encoding unknown (#{encoding1}, #{encoding2})." when encoding1 != encoding2 raise "Document encoding mismatch (#{encoding1}, #{encoding2})." end end if docdiff.config[:eol] == "auto" eol1 = CharString.guess_eol(file1_content) eol2 = CharString.guess_eol(file2_content) case when (eol1.nil? or eol2.nil?) raise "Document eol is nil (#{eol1.inspect}, #{eol2.inspect}). The document might be empty." when (eol1 == 'UNKNOWN' or eol2 == 'UNKNOWN') raise "Document eol unknown (#{eol1.inspect}, #{eol2.inspect})." when (eol1 != eol2) raise "Document eol mismatch (#{eol1}, #{eol2})." end end doc1 = Document.new(file1_content, encoding1, eol1) doc2 = Document.new(file2_content, encoding2, eol2) output = docdiff.run(doc1, doc2, {:resolution => docdiff.config[:resolution], :format => docdiff.config[:format], :digest => docdiff.config[:digest], :display => docdiff.config[:display]}) print output