From 9fda1352d9371226c889eb0d5a9009070971c951 Mon Sep 17 00:00:00 2001 From: Lieuwe Leene Date: Sun, 23 Apr 2023 13:14:23 +0200 Subject: [PATCH] reference scripts --- Dockerfile | 25 +++ scripts/models.py | 0 scripts/python-svg.py | 34 ++++ scripts/tex_deref.py | 449 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 508 insertions(+) create mode 100644 Dockerfile create mode 100644 scripts/models.py create mode 100644 scripts/python-svg.py create mode 100644 scripts/tex_deref.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e573c35 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ + +FROM alpine + +LABEL description="Hugo static build process." +LABEL maintainer="Lieuwe Leene " + +ENV HUGO_VERSION=0.105.0 +ENV HUGO_ID=hugo_${HUGO_VERSION} +ENV BASE_URL="http://localhost/" + +RUN wget -O - https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/${HUGO_ID}_Linux-64bit.tar.gz | tar -xz -C /tmp \ + && mkdir -p /usr/local/sbin \ + && mv /tmp/hugo /usr/local/sbin/hugo \ + && rm -rf /tmp/${HUGO_ID}_linux_amd64 \ + && rm -rf /tmp/LICENSE.md \ + && rm -rf /tmp/README.md + +VOLUME /public + +RUN apk add --update git asciidoctor libc6-compat libstdc++ \ + && apk upgrade \ + && apk add --no-cache ca-certificates \ + && git clone https://github.com/lleene/hugo-site.git /src \ + && git clone https://github.com/lleene/hermit.git /src/themes/hermit \ + && /usr/local/sbin/hugo -b ${BASE_URL} -s /src -d /public --minify diff --git a/scripts/models.py b/scripts/models.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/python-svg.py b/scripts/python-svg.py new file mode 100644 index 0000000..bcb60e3 --- /dev/null +++ b/scripts/python-svg.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +import sys +import re +import railroad + +style = ( + "\tsvg.railroad-diagram {\n\t\tbackground-color:none;\n\t}\n" + + "\tsvg.railroad-diagram path {\n\t\tstroke-width:1.5;\n\t\tstroke:white;\n\t\tfill:rgba(0,0,0,0);\n\t}\n" + + "\tsvg.railroad-diagram text {\n\t\tfont:bold 14px monospace;\n\t\tfill: white;\n\t\ttext-anchor:middle;\n\t}\n" + + "\tsvg.railroad-diagram text.label{\n\t\ttext-anchor:start;\n\t}\n" + + "\tsvg.railroad-diagram text.comment{\n\t\tfont:italic 12px monospace;\n\t}\n" + + "\tsvg.railroad-diagram rect{\n\t\tstroke-width:1.5;\n\t\tstroke:white;\n\t\tfill:none;\n\t}\n" + + "\tsvg.railroad-diagram rect.group-box {\n\t\tstroke: gray;\n\t\tstroke-dasharray: 10 5;\n\t\tfill: none;\n\t}\n" +) + +for md_src in sys.argv[1:]: + with open(md_src, "r") as file: + md_src = file.read() + for start, end in zip( + re.finditer(r"{{< python-svg ", md_src), + re.finditer(r"{{< /python-svg ", md_src), + ): + sub_str = md_src[start.start() : end.start()].split("\n") + [ + print(elem) + for elem in re.findall( + r"([0-z]+=\"[0-z\.\/\-\_\s]+\")+", sub_str[0] + ) + ] + print(sub_str[0]) + print(sub_str[1:-1]) + +# eof diff --git a/scripts/tex_deref.py b/scripts/tex_deref.py new file mode 100644 index 0000000..c81ab89 --- /dev/null +++ b/scripts/tex_deref.py @@ -0,0 +1,449 @@ +#!/usr/bin/env python3 + +"""Command line utility for converting a tex source file into a markdown document.""" + +import sys +import re +from os import path +from pathlib import Path +from typing import List, Tuple, NamedTuple + + +class Section(NamedTuple): + """Structured Figure Item.""" + + span: Tuple[int, int] + index: int + level: int + name: str + label: str + + @property + def markdown(self) -> str: + """Markdown string for this section.""" + return "#" * self.level + f" {self.index} {self.name}\n\n" + + +class Figure(NamedTuple): + """Structured Figure Item.""" + + span: Tuple[int, int] + index: int + files: List[str] + caption: str + label: str + + @property + def markdown(self) -> str: + """Markdown string for this figure.""" + fig_str = "" + for file in self.files[:-1]: + fig_str += "{{" + f'< figure src="{file}" width="500" >' + "}}\n" + fig_str += ( + "{{" + + f'< figure src="{self.files[-1] if self.files else ""}" title="Figure {self.index}: {self.caption}" width="500" >' + + "}}\n" + ) + return fig_str + + +class Equation(NamedTuple): + """Structured Equation Item.""" + + span: Tuple[int, int] + index: int + expression: str + label: str + + @property + def markdown(self) -> str: + """Markdown string for this equation.""" + return f"$$ {self.expression} $$" + + +class Table(NamedTuple): + """Structured Table Item.""" + + span: Tuple[int, int] + index: int + caption: str + content: List[List[str]] + footer: str + label: str + + @property + def markdown(self) -> str: + """Markdown string for this table.""" + tbl_str = f"Table {self.index}: {self.caption}\n" + for index, line in enumerate(self.content): + tbl_str += "|" + "|".join(line).replace("\\\\", "") + "|" + "\n" + if index == 0: + tbl_str += ( + "|" + "|".join(["----" for elem in line]) + "|" + "\n" + ) + tbl_str += self.footer + return tbl_str + + +class Citation(NamedTuple): + """Structured Citation Item.""" + + index: int + name: str + label: str + + +class LatexFile: + def __init__(self, src_file: Path): + sys_path = path.abspath(src_file) + src_dir = path.dirname(sys_path) + src_file = path.basename(sys_path) + self.tex_src = self.flatten_input("\\input{" + src_file + "}", src_dir) + self.filter_tex(sys_path.replace(".tex", ".bbl")) + + @classmethod + def first(cls, list: List[str]) -> str: + """Fetch the first optional element else return empty string.""" + return list[0] if list else "" + + @property + def figures(self) -> List[Figure]: + """Parse TEX contents for context eces.""" + return [ + Figure( + span=(begin.start(), stop.end()), + index=index + 1, + files=[ + elem[1] + for elem in re.findall( + "\\\\includegraphics(.*)\{(.*)\}", + self.tex_src[begin.start() : stop.end()], + ) + ], + caption=self.first( + re.findall( + "\\\\caption\{(.*)\}", + self.tex_src[begin.start() : stop.end()], + ) + ), + label=self.first( + re.findall( + "\\\\label\{(.*)\}", + self.tex_src[begin.start() : stop.end()], + ) + ), + ) + for index, (begin, stop) in enumerate( + zip( + re.finditer("\\\\begin\{figure\*?\}", self.tex_src), + re.finditer("\\\\end\{figure\*?\}", self.tex_src), + ) + ) + ] + + @property + def sections(self) -> List[Section]: + """Parse TEX contents for context refereces.""" + sec_list = [] + for index, match in enumerate( + re.finditer(r"\\(sub)*(section|chapter)(.*)", self.tex_src) + ): + sub_string = self.tex_src[match.start() : match.end()] + label = self.first(re.findall("\\\\label\{(.*)\}", sub_string)) + sub_string = re.sub( + r"\\(sub)*(section|chapter)", + "", + sub_string.replace("\\label{" + label + "}", ""), + ).strip() + sec_list.append( + Section( + span=(match.start(), match.end()), + index=index + 1, + level=len(match.groups()[0] or "") // 3 + 1, + name=sub_string[1:-1], + label=label, + ) + ) + return sec_list + + @property + def equations(self) -> List[Equation]: + """Parse TEX contents for context refereces.""" + eq_list = [] + for index, (begin, stop) in enumerate( + zip( + re.finditer("\\\\begin\{equation\}", self.tex_src), + re.finditer("\\\\end\{equation\}", self.tex_src), + ) + ): + sub_string = self.tex_src[begin.end() : stop.start()] + label = self.first(re.findall("\\\\label\{(.*)\}", sub_string)) + eq_list.append( + Equation( + span=(begin.start(), stop.end()), + index=index + 1, + expression=sub_string.replace( + "\\label{" + label + "}", "" + ).replace("\n", ""), + label=label, + ) + ) + return eq_list + + @property + def tables(self) -> List[Table]: + """Parse TEX contents for context refereces.""" + tbl_list = [] + for index, (begin, stop) in enumerate( + zip( + re.finditer("\\\\begin\{table\*?\}", self.tex_src), + re.finditer("\\\\end\{table\*?\}", self.tex_src), + ) + ): + sub_string = self.tex_src[begin.end() : stop.start()] + label = self.first(re.findall("\\\\label\{(.*)\}", sub_string)) + caption = self.first(re.findall("\\\\caption\{(.*)\}", sub_string)) + footer = sub_string[ + re.search("\\\\end\{tabular\}", sub_string).end() : + ].replace("\n", "") + sub_string = sub_string[ + re.search("\\\\begin\{tabular\}", sub_string) + .end() : re.search("\\\\end\{tabular\}", sub_string) + .start() + ] + content = [line.split("&") for line in sub_string.split("\n")[1:]] + [ + content.pop(row - 1) + for row in range(len(content), 0, -1) + if len(content[row - 1]) <= 1 + ] + tbl_list.append( + Table( + span=(begin.start(), stop.end()), + index=index + 1, + caption=caption, + content=content, + footer=footer, + label=label, + ) + ) + return tbl_list + + def replace_figures(self) -> None: + """Dereference and replace all figures with markdown formatting.""" + fig_list = self.figures + fig_list.reverse() + for figure in fig_list: + self.tex_src = ( + self.tex_src[: figure.span[0]] + + figure.markdown + + self.tex_src[figure.span[1] :] + ) + for figure in fig_list: + self.tex_src = re.sub( + "\\\\ref\{" + figure.label + "\}", + str(figure.index), + self.tex_src, + ) + + def replace_tables(self) -> None: + """Dereference and replace all tables with markdown formatting.""" + tbl_list = self.tables + tbl_list.reverse() + for table in tbl_list: + self.tex_src = ( + self.tex_src[: table.span[0]] + + table.markdown + + self.tex_src[table.span[1] :] + ) + for table in tbl_list: + self.tex_src = re.sub( + "\\\\ref\{" + table.label + "\}", + str(table.index), + self.tex_src, + ) + + def replace_equations(self) -> None: + """Dereference and replace all equations with markdown formatting.""" + eq_list = self.equations + eq_list.reverse() + for equation in eq_list: + self.tex_src = ( + self.tex_src[: equation.span[0]] + + equation.markdown + + self.tex_src[equation.span[1] :] + ) + for equation in eq_list: + self.tex_src = re.sub( + "\\\\ref\{" + equation.label + "\}", + str(equation.index), + self.tex_src, + ) + + @classmethod + def parse_bbl(cls, lines: List[str]) -> List[Citation]: + """Parse BBL contents for bibtec refereces.""" + return [ + Citation( + label=re.match("\\\\bibitem{([0-z\.\-_]*)}", entry).groups()[ + 0 + ], + index=index + 1, + name=re.sub( + "\\\\emph|\\\\BIBentry[A-z]*wordspacing|\\\\bibitem\{([0-z]*)\}|[\{\}\n~]|\\\\url", + "", + entry, + ), + ) + for index, entry in enumerate("".join(lines).split("\n\n")[1:-1]) + ] + + @classmethod + def md_bbl(cls, src_bbl: List[Citation]) -> str: + bbl_str = "# References:\n\n" + src_bbl.sort(key=lambda x: x.index) + for citation in src_bbl: + bbl_str += f"[^{citation.index}]: {citation.name}\n" + return bbl_str + + def replace_sections(self) -> None: + """Dereference and replace all sections with markdown formatting.""" + sc_list = self.sections + sc_list.reverse() + for section in sc_list: + self.tex_src = ( + self.tex_src[: section.span[0]] + + section.markdown + + self.tex_src[section.span[1] :] + ) + for sc in sc_list: + self.tex_src = re.sub( + "\\\\ref\{" + sc.label + "\}", str(sc.index), self.tex_src + ) + + def preprocess(self) -> None: + """Prep proceedure for customized formatting.""" + custom_rules = [ + (r"\n(\w)*%.*", ""), + (r"}(\n)*\\label", r"}\\label"), + (r" %.*", ""), + (r"\\usection", r"\\section"), + (r"\\usubsection", r"\\subsection"), + (r"\\begin{abstract}", r"\\section{Abstract}"), + (r"\\end{abstract}", r""), + (r"\\maketitle", r""), + (r"\\IEEEpeerreviewmaketitle", r""), + (r"\\bstctlcite{[0-z:]*}", r""), + (r"\\clearpage", r""), + (r"\\pagebreak", r""), + (r"\\\\[ ]*", r""), + (r"\$\\,\$", r" "), + (r"\$([0-z\\\.\-\+_,{}%\(\)\/]*)\$", r"\\\\(\g<1>\\\\)"), + (r"\\flushleft", r""), + (r"\\begin{flushleft}", r""), + (r"\\end{flushleft}", r""), + (r"\\begin{center}", r""), + (r"\\end{center}", r""), + ] + for rule, result in custom_rules: + self.tex_src = re.sub(rule, result, self.tex_src) + + def postprocess(self) -> None: + """Clean up proceedure for customized formatting.""" + custom_rules = [ + (r"\\,", r" "), + (r"~", r" "), + (r"\\tsqrd", r"²"), + (r"\\rpi", r"π"), + (r"\\rmu", r"μ"), + (r"\\rbeta", r"β"), + (r"\\ralpha", r"α"), + (r"\\rDelta", r"Δ"), + (r"\\rdelta", r"δ"), + (r"\\rsigma", r"σ"), + (r"\\rSigma", r"Σ"), + (r"\\rtau", r"τ"), + (r"\\reta", r"η"), + (r"\\rphi", r"φ"), + (r"\\rPhi", r"Φ"), + (r"\\romeg", r"ω"), + (r"\\rOmeg", r"Ω"), + (r"\\vspace\{[0-z \.\-\+]*\}", r""), + (r"\\hspace\{[0-z \.\-\+]*\}", r""), + (r"\\vfill", r""), + (r"\\hfill", r""), + (r" \\& ", r" & "), + (r"\$\\pm\$", "±"), + (r"\\tss\{([0-z,_\. ]*)\}", r"\g<1>"), + (r"\\tps\{([0-z,_\. ]*)\}", r"\g<1>"), + (r"\\textbf\{([0-z\.,_ ]*)\}", r"**\g<1>**"), + (r"\\sqrt", r"√"), + (r"\\%", "%"), + ] + for rule, result in custom_rules: + self.tex_src = re.sub(rule, result, self.tex_src) + + @classmethod + def flatten_input(cls, tex_src: str, basedir: Path = Path(".")) -> str: + """Recusive method for generating a flattened latex source.""" + for source_file in re.findall("input\{([0-z\/\.\_\-]*)\}", tex_src): + tex_input = list() + with open( + path.join(basedir, source_file), "r", encoding="utf8" + ) as input: + for line in input.readlines(): + tex_input.append( + line + if not re.findall("input\{([0-z\/\.\_\-]*)\}", line) + else cls.flatten_input(line, basedir=basedir) + ) + tex_src = tex_src.replace( + "\\input{" + source_file + "}", "".join(tex_input) + ) + return tex_src + + def replace_references(self, bbl_file: Path) -> None: + with open(bbl_file, "r") as file: + bbl_list = self.parse_bbl(file.readlines()) + bbl_list.sort(key=lambda x: x.label) + bbl_list.reverse() + for bbl in bbl_list: + self.tex_src = re.sub( + "\\\\cite{([0-z,\.\-_]*)(" + + bbl.label + + ")([0-z,\.\-_]*)}", + f"[^{bbl.index}]" + "\\\\cite{\g<1>\g<3>}", + self.tex_src, + ) + self.tex_src = re.sub( + r"\\bibliographystyle\{[0-z,\-_\/\.]*\}", "", self.tex_src + ) + self.tex_src = re.sub( + r"\\bibliography\{[0-z,\-_\.\/]*\}", + self.md_bbl(bbl_list), + self.tex_src, + ) + self.tex_src = re.sub(r"\\cite{[,]*}", "", self.tex_src) + + def strip_tex(self) -> None: + """Clear default TEX preable - postamble.""" + begin = re.search(r"\\begin\{document\}", self.tex_src).end() + end = re.search(r"\\end\{document\}", self.tex_src).start() + self.tex_src = self.tex_src[begin:end] + + def filter_tex(self, bbl_file: Path) -> None: + """Default TEX filterting proceedure.""" + self.strip_tex() + self.preprocess() + self.replace_references(bbl_file) + self.replace_figures() + self.replace_tables() + self.replace_equations() + self.replace_sections() + self.postprocess() + + +for file in sys.argv[1:]: + print(LatexFile(file).tex_src) + +# eof