#! /usr/bin/env python

#---------------------------------------------------------------------
# Darcs Reannotater
#
# The annotation output produced by Darcs is difficult to read.  This
# program converts the Darcs annotation output to something similar to
# CVS's annotation output.
# 
# Send feedback to <kannan@cakoose.com>.
#
#---------------------------------------------------------------------
# Usage
#
# This program processes the output of "darcs annotate".
#
# Using Darcs output directly:
#    darcs annotate File.txt | ./darcs-reannotate
#
# Reading from files:
#    darcs annotate File1.txt > File1.ann
#    darcs annotate File2.txt > File2.ann
#    ./darcs-reannotate File1.ann File2.ann
#
# Output always goes to stdout.
#
#---------------------------------------------------------------------
# What It Does
#
# The program reads in all the lines and maintains a set of all the
# involved patches.  It then creates a unique "nickname" for each
# patch.  Idealy, the nickname is short and meaningful.  This is done
# in the "create_patch_nicknames(...)" function.
#
# Then, all the patches are printed out along with their associated
# nicknames.  This is followed by contents of the revision-controlled
# file, with each line prefixed with the patch nickname.
#
#---------------------------------------------------------------------
# Change Log
#
# [2005 Mar 22]
# - Parse the patch date so that we can reformat it.  Contributed by
#   Sean Perry.
# - Output "^" instead of blanks for lines that belong to the same
#   patch as the previous line.  Suggested by Mark Stossberg.
#
#---------------------------------------------------------------------
# Known Issues
#
# The date parsing library routine doesn't seem to recognize many time
# zones (at least, when run on my machine).
#
# Doesn't do much error checking but it isn't intended to be a robust
# tool.  The original purpose was to make it easy to try out different
# ideas for a better "darcs annotate" format.
#
#---------------------------------------------------------------------

import sys

def print_usage_info(prog_name, out):
   out.write("""
This program processes the output of "darcs annotate".

Using darcs output directly:
   darcs annotate File.txt | %s

Reading from files:
   darcs annotate File1.txt > File1.ann
   darcs annotate File2.txt > File2.ann
   %s File1.ann File2.ann

Output always goes to stdout.

""" % (prog_name, prog_name))

def main(prog_name='darcs-reannotate', args=[]):

   # Quick check for '-h' or '--help'
   for arg in args:
      if arg == '-h' or arg == '--help':
         print_usage_info(prog_name, sys.stdout)
         return
      elif arg == '-' or arg == '--':
         break;

   if len(args) == 0:
      # Read from stdin
      convert(sys.stdin, sys.stdout)
   else:
      # Read command line args
      for file_name in args:
         try:
            file = open(file_name)
         except IOError, err:
            raise AssertionError("Couldn't open %s for reading: %s" % (file_name, err.strerror))
         convert(file, sys.stdout)

# Struct with 4 fields
#   author :: String
#   date   :: String
#   short  :: String
#   long   :: [String]
class PatchInfo:
   def __init__(self, author, date, short, long=[]):
      assert type(author) == str
      assert type(short) == str
      assert type(long) == list
      self.author = author
      self.date = date
      self.short = short
      self.long = long
      self.saved_hash = hash(author) ^ hash(date) ^ hash(short)
         # Not sure if Darcs considers 'long' part of the patch identity.
   def __hash__(self):
      return self.saved_hash
   def __eq__(self, other):
      if type(self) != type(other): return False
      return \
         (self.author == other.author) and \
         (self.date == other.date) and \
         (self.short == other.short)
   def __ne__(self, other): return not self.__eq__(other)

INPUT_TIME_FORMATS = [
   "%Y%m%d%H%M%S",
   "%a %b %d %H:%M:%S %Z %Y",

   # For some reason, many time zones aren't recognized when this
   # program is run on my machine.  This is a hack to have those time
   # zones included, even though the proper time zone conversion wont
   # take place.
   "%a %b %d %H:%M:%S EST %Y",
   "%a %b %d %H:%M:%S EDT %Y",
   "%a %b %d %H:%M:%S CEST %Y",
]

# date_string :: String
# :: String
def pretty_date(date_string):
   import time
   # Try all the time formats until one works
   for parse_format in INPUT_TIME_FORMATS:
      try:
         timestamp = time.strptime(date_string, parse_format)
         return time.strftime("%x %X", timestamp)
      except ValueError:
         pass

   # Unable to parse.  Just return the input string.
   #raise ("Couldn't parse '%s'" % date_string)
   return date_string

# fin: input stream with Darcs' raw annotation output
# fout: output stream for friendly annotation output
def convert(fin, fout):
   from sets import Set
   patches = Set() # :: Set PatchInfo
   lines = []      # :: [(Line,PatchInfo)]

   # Read and ignore file-creation patch
   file_creation_patch = read_patch_info(fin)
   assert file_creation_patch != None

   # Read in all the lines.
   while True:
      patch_info = read_patch_info(fin)
      if patch_info == None: break
      line = fin.readline()
      patches.add(patch_info)
      lines.append((line, patch_info))

   # :: {PatchInfo : String}
   patch_nicknames = create_patch_nicknames(patches)

   fout.write("-- Patches --\n\n")

   # Dump (Nickname -> Patch) mapping
   longest_nickname = 1
   for (patch_info, nickname) in patch_nicknames.iteritems():
      fout.write(nickname + ": " + patch_info.author + ", " + pretty_date(patch_info.date) + "\n")
      #fout.write(nickname + ": " + patch_info.author + ", " + patch_info.date + "\n")
      fout.write(" * " + patch_info.short + '\n')
      for line in patch_info.long:
         fout.write(' ')
         fout.write(line)
      fout.write('\n')
      longest_nickname = max(longest_nickname, len(nickname))

   bunch_of_spaces = ''.join([' ' for num in xrange(longest_nickname - 1)])
   bunch_of_spaces += '^ '

   fout.write("-- File Contents --\n\n")

   # Dump file content
   prev_patch_info = None
   for (line, patch_info) in lines:
      if (patch_info == prev_patch_info):
         # Coalesce
         fout.write(bunch_of_spaces)
      else:
         prev_patch_info = patch_info
         nickname = patch_nicknames[patch_info]
         padding = longest_nickname - len(nickname)
         fout.write(bunch_of_spaces[0:padding])
         fout.write(nickname)
         fout.write(':')
      fout.write(line)

# Try and create unique, easy-to-read names for each patch.  This function
# should try as hard as it can to create really good names.  Right now, it
# just uses a couple tokens from the 'author' field and appends a number to
# disambiguate.
#
# patches :: Set PatchInfo
# :: {PatchInfo : String}
def create_patch_nicknames(patches):
   patch_nicknames = {}
   nickname_counters = {} # :: {String : Integer}

   # Maximum number of chars in prefix.  The appended digits could make
   # the nickname slightly longer, though.
   char_limit = 10

   import re
   splitter = re.compile('[ \.<>@]*')

   nickname_counters[""] = 1

   for patch_info in patches:
      parts = splitter.split(patch_info.author)
      #print "parts =", parts

      if len(parts) == 0:
         prefix = ""
      elif len(parts) == 1:
         prefix = parts[0]
      else:
         # Use first component plus first letter of second component
         prefix = (parts[0]+parts[1][0])

      # Limit prefix length
      prefix = prefix[0:char_limit]

      # Strip numbers from the end of the nickname.
      while (prefix[-1].isdigit()):
         prefix = prefix[0:-1]

      lcase_prefix = prefix.lower()

      if nickname_counters.has_key(lcase_prefix):
         # Some other patch is already using this nickname.
         count = nickname_counters[lcase_prefix]
         suffix = str(count+1)
         nickname_counters[lcase_prefix] = count+1
      else:
         # We're the first patch to use this nickname
         nickname_counters[lcase_prefix] = 1
         suffix = ""

      nickname = prefix + suffix
      patch_nicknames[patch_info] = nickname

   return patch_nicknames

# Reads a line and drops the trailing newline
def read_line(fin):
   line = fin.readline()
   if line.endswith('\n'):
      line = line[:-1]
   return line

# Read a single patch info entry in Darcs' raw annotation output
# Return a PatchInfo object or None if there's nothing to be read.
#
# :: Maybe PatchInfo
def read_patch_info(fin):
   # Short patch name
   line = read_line(fin)
   if (line == ''): return None
   try:
      open_bracket_pos = line.index('[')
   except ValueError:
      return None  # end of file
   short = line[open_bracket_pos+1:]

   # Author, Date
   line = read_line(fin)
   star_star_pos = line.index('**')
   author = line[2:star_star_pos]
   date = line[star_star_pos+2:]

   # Long comment
   long = []
   try:
      close_bracket_pos = date.index(']')
      # If there is a close bracket, then there's no long description.
      # Just strip of the trailing ']'
      date = date[:close_bracket_pos]
   except ValueError:
      # Parse long description
      line = read_line(fin)
      while not line.startswith(']'):
         line = line[1:]  # Strip leading space
         long.append(line)
         line = fin.readline()

   return PatchInfo(author, date, short, long)

if __name__ == "__main__":
   main(sys.argv[0], sys.argv[1:])


