#!/usr/bin/env python
# 
# Copyright 2012, Karljohan Lundin Palmerius
# 
# Usage:
# 
# import BibTeXParser
# parser = BibTeXParser()
# result = parser.parse("bibliography.bib")
# 
# The result will be a list of one associative array for each bibtex
# record, containing the fields "type" and "key" with record type and
# bibtex-key, respectively, and the fields of each record.
# 

import re

re_head_pattern = """\s*(\w+)\s*[({]\s*(\w*)\s*"""
re_var_pattern = """\s*(\w+)\s*=\s*(.*)\s*,?"""

class BibTeXParser:
  
  def __init__(self):
    pass

  def parse(self,filename):
    
    file_contents = None
    with open(filename,"r") as fin:
      file_contents = fin.read()
    
    if file_contents is None:
      return None
    
    records = file_contents.split("@")
    re_head = re.compile(re_head_pattern)
    re_var = re.compile(re_var_pattern)
    
    result = []
    
    for record in records:
      
      lines = record.splitlines()
      if len(lines) < 2: continue
      
      head_res = re_head.match(lines[0])
      del lines[0]

      res_rec = { "type": head_res.group(1), "key": head_res.group(2) }
      
      for line in lines:
        var_res = re_var.match(line)
        if var_res is None: continue
        res_rec[var_res.group(1)] = var_res.group(2).strip("""}"{,""")
        
      result.append(res_rec)

    return result