#!/usr/bin/env python
# $Id: merge_gdd_bg.py,v 1.3 2012/10/21 23:38:56 asdrury Exp $
# http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0017
# Linux:
#  $ export LANG=en_US.UTF-8
#  [perseus 502]$ python --version
#  Python 3.2.3
# Windows:
# > set PYTHONIOENCODING=utf-8

################################################################################################################################
__author__ = "A. S. Drury"
__version__ = "$Revision: 1.3 $"
__date__ = "$Date: 2012/10/21 23:38:56 $"
################################################################################################################################

################################################################################################################################
import sys, re
from lxml import etree
from optparse import OptionParser
################################################################################################################################

################################################################################################################################
parser = OptionParser(usage="%prog -a xml -b xml [-h]", version="$Id: merge_gdd_bg.py,v 1.3 2012/10/21 23:38:56 asdrury Exp $")
parser.add_option("-a", "--afname", dest="afname", type="string", action="store", default="bg.xml", help="First XML file. Default=bg.xml")
parser.add_option("-b", "--bfname", dest="bfname", type="string", action="store", default="gdd_notes.xml", help="Second XML file. Default=gdd_notes.xml")
(options, args) = parser.parse_args()
################################################################################################################################

################################################################################################################################
# http://www.python.org/peps/pep-0263.html for details
# http://docs.python.org/howto/unicode.html
# http://www.joelonsoftware.com/articles/Unicode.html
# The rules for translating a Unicode string into a sequence of bytes are called an encoding.
#u = "é".encode("utf-8")
#u = str("é", errors="strict"); print(u)
################################################################################################################################

################################################################################################################################
latins = {'1':'I', '2':'II', '3':'III', '4':'IV', '5':'V', '6':'VI', '7':'VII', '8':'VIII'}
def latinize(n):
    return latins[str(n)]
################################################################################################################################

################################################################################################################################
def normalize(s, tag=None):
    r = s
    if s is not None:
        r = re.sub('\s+',' ',s.strip())
        r = re.sub('&','\&amp;',r)
    else:
        r = ''
    if tag is not None and '' != r:
        r = '<' + str(tag) + '>' + r + '</' + str(tag) + '>'
    return r
################################################################################################################################

################################################################################################################################
def get_text(e, tag=None):
    r = ''
    if e is not None and e.text is not None and '' != e.text.strip():
        r = e.text
    if e is not None and e.tail is not None and '' != e.tail.strip():
        r = r +' '+ e.tail
    return normalize(r, tag)
################################################################################################################################

################################################################################################################################
def p(s):
    if s is not None and '' != s.strip():
        print(s.strip())
################################################################################################################################

################################################################################################################################
def cat(note, s, html=None):
    t = note.text if note.text is not None else ''
    t = t + ' '
    t = t + normalize(s)
    t = t.strip()
    note.text = t
    return note
################################################################################################################################

################################################################################################################################
parser = etree.XMLParser(remove_blank_text=True)
################################################################################################################################
afile = open(options.afname, encoding="utf-8", mode="rt")
atree = etree.parse(afile)
aroot = atree.getroot()
################################################################################################################################
bfile = open(options.bfname, encoding="utf-8", mode="rt")
btree = etree.parse(bfile)
broot = btree.getroot()
################################################################################################################################
bg_books = atree.findall("//div1[@type='Book']")
for bg_book in bg_books:
    bg_liber = bg_book.get('n')
    bg_chapters_xpath = "//div1[@type='Book' and @n='" + str(bg_liber) + "']/p/milestone[@unit='chapter']"
    bg_chapters = bg_book.xpath( bg_chapters_xpath )
    for bg_chapter in bg_chapters:
        bg_capitulum = bg_chapter.get('n')
        xpath = "//note[@liber='" + str(bg_liber) + "' and @capitulum='" + str(bg_capitulum) + "']"
        gdd_notes = broot.xpath(xpath)
        bg_notes = etree.Element("notes")
        for gdd_note in gdd_notes:
            gdd_note.set("source", "GDD")
            bg_notes.append(gdd_note)
        bg_chapter.append(bg_notes)
f = open('merged.xml', 'w')
f.write(etree.tostring(atree, pretty_print=True).decode("utf-8"))
f.close()