To-The-Stars/litwiki/parse.py

48 lines
1.3 KiB
Python

"""
All parsing functions
"""
import logging
import unittest
import re
import orgparse
import litwiki.citation
RE_CITATION_LIKE = re.compile("(?:\()[A-Z]*[0-9\.]+(?:\))")
RE_CITATION = re.compile("(?:\()[0-9]+\.[0-9]+\.[0-9]+(?:\))")
def check(f):
root = orgparse.load(f)
queue = [root]
nCitations = 0
while queue:
node = queue.pop()
queue += list(node.children)
citation_likes = RE_CITATION_LIKE.findall(node.body)
for citation in citation_likes:
nCitations += 1
if litwiki.citation.parse(citation[1:-1]) is None:
raise ValueError(f"Unable to parse citation {citation}")
logging.info("%s citations checked.", nCitations)
class TestParse(unittest.TestCase):
def test_regex_citation(self):
self.assertTrue(RE_CITATION_LIKE.match("(1.2)"))
self.assertTrue(RE_CITATION_LIKE.match("(1)"))
self.assertFalse(RE_CITATION_LIKE.match("(a)"))
self.assertTrue(RE_CITATION.match("(1.2.3)"))
self.assertFalse(RE_CITATION.match("(I.2.3)"))
self.assertFalse(RE_CITATION.match("(I.2)"))
self.assertFalse(RE_CITATION.match("1.2"))
self.assertEqual(
RE_CITATION.findall("(1.2.3). blablabla (4.5.6)."), ["(1.2.3)", "(4.5.6)"]
)
if __name__ == "__main__":
unittest.main()