48 lines
1.3 KiB
Python
48 lines
1.3 KiB
Python
|
"""
|
||
|
All parsing functions
|
||
|
"""
|
||
|
import logging
|
||
|
import unittest
|
||
|
import re
|
||
|
import orgparse
|
||
|
import litwiki.citation
|
||
|
|
||
|
RE_CITATION_LIKE = re.compile("(?:\()[A-Z]*[0-9\.]+(?:\))")
|
||
|
RE_CITATION = re.compile("(?:\()[0-9]+\.[0-9]+\.[0-9]+(?:\))")
|
||
|
|
||
|
|
||
|
def check(f):
|
||
|
root = orgparse.load(f)
|
||
|
|
||
|
queue = [root]
|
||
|
nCitations = 0
|
||
|
while queue:
|
||
|
node = queue.pop()
|
||
|
queue += list(node.children)
|
||
|
citation_likes = RE_CITATION_LIKE.findall(node.body)
|
||
|
for citation in citation_likes:
|
||
|
nCitations += 1
|
||
|
if litwiki.citation.parse(citation[1:-1]) is None:
|
||
|
raise ValueError(f"Unable to parse citation {citation}")
|
||
|
|
||
|
logging.info("%s citations checked.", nCitations)
|
||
|
|
||
|
|
||
|
class TestParse(unittest.TestCase):
|
||
|
def test_regex_citation(self):
|
||
|
self.assertTrue(RE_CITATION_LIKE.match("(1.2)"))
|
||
|
self.assertTrue(RE_CITATION_LIKE.match("(1)"))
|
||
|
self.assertFalse(RE_CITATION_LIKE.match("(a)"))
|
||
|
self.assertTrue(RE_CITATION.match("(1.2.3)"))
|
||
|
self.assertFalse(RE_CITATION.match("(I.2.3)"))
|
||
|
self.assertFalse(RE_CITATION.match("(I.2)"))
|
||
|
self.assertFalse(RE_CITATION.match("1.2"))
|
||
|
|
||
|
self.assertEqual(
|
||
|
RE_CITATION.findall("(1.2.3). blablabla (4.5.6)."), ["(1.2.3)", "(4.5.6)"]
|
||
|
)
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
unittest.main()
|