#!/usr/bin/env python # # Copyright 2012, Karljohan Lundin Palmerius # # Usage: # # import BibTeXParser # parser = BibTeXParser() # result = parser.parse("bibliography.bib") # # The result will be a list of one associative array for each bibtex # record, containing the fields "type" and "key" with record type and # bibtex-key, respectively, and the fields of each record. # import re re_head_pattern = """\s*(\w+)\s*[({]\s*(\w*)\s*""" re_var_pattern = """\s*(\w+)\s*=\s*(.*)\s*,?""" class BibTeXParser: def __init__(self): pass def parse(self,filename): file_contents = None with open(filename,"r") as fin: file_contents = fin.read() if file_contents is None: return None records = file_contents.split("@") re_head = re.compile(re_head_pattern) re_var = re.compile(re_var_pattern) result = [] for record in records: lines = record.splitlines() if len(lines) < 2: continue head_res = re_head.match(lines[0]) del lines[0] res_rec = { "type": head_res.group(1), "key": head_res.group(2) } for line in lines: var_res = re_var.match(line) if var_res is None: continue res_rec[var_res.group(1)] = var_res.group(2).strip("""}"{,""") result.append(res_rec) return result