reference scripts

This commit is contained in:
Lieuwe Leene 2023-04-23 13:14:23 +02:00
parent d6c74cfc41
commit 9fda1352d9
4 changed files with 508 additions and 0 deletions

25
Dockerfile Normal file
View File

@ -0,0 +1,25 @@
FROM alpine
LABEL description="Hugo static build process."
LABEL maintainer="Lieuwe Leene <lieuwe@leene.dev>"
ENV HUGO_VERSION=0.105.0
ENV HUGO_ID=hugo_${HUGO_VERSION}
ENV BASE_URL="http://localhost/"
RUN wget -O - https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/${HUGO_ID}_Linux-64bit.tar.gz | tar -xz -C /tmp \
&& mkdir -p /usr/local/sbin \
&& mv /tmp/hugo /usr/local/sbin/hugo \
&& rm -rf /tmp/${HUGO_ID}_linux_amd64 \
&& rm -rf /tmp/LICENSE.md \
&& rm -rf /tmp/README.md
VOLUME /public
RUN apk add --update git asciidoctor libc6-compat libstdc++ \
&& apk upgrade \
&& apk add --no-cache ca-certificates \
&& git clone https://github.com/lleene/hugo-site.git /src \
&& git clone https://github.com/lleene/hermit.git /src/themes/hermit \
&& /usr/local/sbin/hugo -b ${BASE_URL} -s /src -d /public --minify

0
scripts/models.py Normal file
View File

34
scripts/python-svg.py Normal file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env python3
import sys
import re
import railroad
style = (
"\tsvg.railroad-diagram {\n\t\tbackground-color:none;\n\t}\n"
+ "\tsvg.railroad-diagram path {\n\t\tstroke-width:1.5;\n\t\tstroke:white;\n\t\tfill:rgba(0,0,0,0);\n\t}\n"
+ "\tsvg.railroad-diagram text {\n\t\tfont:bold 14px monospace;\n\t\tfill: white;\n\t\ttext-anchor:middle;\n\t}\n"
+ "\tsvg.railroad-diagram text.label{\n\t\ttext-anchor:start;\n\t}\n"
+ "\tsvg.railroad-diagram text.comment{\n\t\tfont:italic 12px monospace;\n\t}\n"
+ "\tsvg.railroad-diagram rect{\n\t\tstroke-width:1.5;\n\t\tstroke:white;\n\t\tfill:none;\n\t}\n"
+ "\tsvg.railroad-diagram rect.group-box {\n\t\tstroke: gray;\n\t\tstroke-dasharray: 10 5;\n\t\tfill: none;\n\t}\n"
)
for md_src in sys.argv[1:]:
with open(md_src, "r") as file:
md_src = file.read()
for start, end in zip(
re.finditer(r"{{< python-svg ", md_src),
re.finditer(r"{{< /python-svg ", md_src),
):
sub_str = md_src[start.start() : end.start()].split("\n")
[
print(elem)
for elem in re.findall(
r"([0-z]+=\"[0-z\.\/\-\_\s]+\")+", sub_str[0]
)
]
print(sub_str[0])
print(sub_str[1:-1])
# eof

449
scripts/tex_deref.py Normal file
View File

@ -0,0 +1,449 @@
#!/usr/bin/env python3
"""Command line utility for converting a tex source file into a markdown document."""
import sys
import re
from os import path
from pathlib import Path
from typing import List, Tuple, NamedTuple
class Section(NamedTuple):
"""Structured Figure Item."""
span: Tuple[int, int]
index: int
level: int
name: str
label: str
@property
def markdown(self) -> str:
"""Markdown string for this section."""
return "#" * self.level + f" {self.index} {self.name}\n\n"
class Figure(NamedTuple):
"""Structured Figure Item."""
span: Tuple[int, int]
index: int
files: List[str]
caption: str
label: str
@property
def markdown(self) -> str:
"""Markdown string for this figure."""
fig_str = ""
for file in self.files[:-1]:
fig_str += "{{" + f'< figure src="{file}" width="500" >' + "}}\n"
fig_str += (
"{{"
+ f'< figure src="{self.files[-1] if self.files else ""}" title="Figure {self.index}: {self.caption}" width="500" >'
+ "}}\n"
)
return fig_str
class Equation(NamedTuple):
"""Structured Equation Item."""
span: Tuple[int, int]
index: int
expression: str
label: str
@property
def markdown(self) -> str:
"""Markdown string for this equation."""
return f"$$ {self.expression} $$"
class Table(NamedTuple):
"""Structured Table Item."""
span: Tuple[int, int]
index: int
caption: str
content: List[List[str]]
footer: str
label: str
@property
def markdown(self) -> str:
"""Markdown string for this table."""
tbl_str = f"Table {self.index}: {self.caption}\n"
for index, line in enumerate(self.content):
tbl_str += "|" + "|".join(line).replace("\\\\", "") + "|" + "\n"
if index == 0:
tbl_str += (
"|" + "|".join(["----" for elem in line]) + "|" + "\n"
)
tbl_str += self.footer
return tbl_str
class Citation(NamedTuple):
"""Structured Citation Item."""
index: int
name: str
label: str
class LatexFile:
def __init__(self, src_file: Path):
sys_path = path.abspath(src_file)
src_dir = path.dirname(sys_path)
src_file = path.basename(sys_path)
self.tex_src = self.flatten_input("\\input{" + src_file + "}", src_dir)
self.filter_tex(sys_path.replace(".tex", ".bbl"))
@classmethod
def first(cls, list: List[str]) -> str:
"""Fetch the first optional element else return empty string."""
return list[0] if list else ""
@property
def figures(self) -> List[Figure]:
"""Parse TEX contents for context eces."""
return [
Figure(
span=(begin.start(), stop.end()),
index=index + 1,
files=[
elem[1]
for elem in re.findall(
"\\\\includegraphics(.*)\{(.*)\}",
self.tex_src[begin.start() : stop.end()],
)
],
caption=self.first(
re.findall(
"\\\\caption\{(.*)\}",
self.tex_src[begin.start() : stop.end()],
)
),
label=self.first(
re.findall(
"\\\\label\{(.*)\}",
self.tex_src[begin.start() : stop.end()],
)
),
)
for index, (begin, stop) in enumerate(
zip(
re.finditer("\\\\begin\{figure\*?\}", self.tex_src),
re.finditer("\\\\end\{figure\*?\}", self.tex_src),
)
)
]
@property
def sections(self) -> List[Section]:
"""Parse TEX contents for context refereces."""
sec_list = []
for index, match in enumerate(
re.finditer(r"\\(sub)*(section|chapter)(.*)", self.tex_src)
):
sub_string = self.tex_src[match.start() : match.end()]
label = self.first(re.findall("\\\\label\{(.*)\}", sub_string))
sub_string = re.sub(
r"\\(sub)*(section|chapter)",
"",
sub_string.replace("\\label{" + label + "}", ""),
).strip()
sec_list.append(
Section(
span=(match.start(), match.end()),
index=index + 1,
level=len(match.groups()[0] or "") // 3 + 1,
name=sub_string[1:-1],
label=label,
)
)
return sec_list
@property
def equations(self) -> List[Equation]:
"""Parse TEX contents for context refereces."""
eq_list = []
for index, (begin, stop) in enumerate(
zip(
re.finditer("\\\\begin\{equation\}", self.tex_src),
re.finditer("\\\\end\{equation\}", self.tex_src),
)
):
sub_string = self.tex_src[begin.end() : stop.start()]
label = self.first(re.findall("\\\\label\{(.*)\}", sub_string))
eq_list.append(
Equation(
span=(begin.start(), stop.end()),
index=index + 1,
expression=sub_string.replace(
"\\label{" + label + "}", ""
).replace("\n", ""),
label=label,
)
)
return eq_list
@property
def tables(self) -> List[Table]:
"""Parse TEX contents for context refereces."""
tbl_list = []
for index, (begin, stop) in enumerate(
zip(
re.finditer("\\\\begin\{table\*?\}", self.tex_src),
re.finditer("\\\\end\{table\*?\}", self.tex_src),
)
):
sub_string = self.tex_src[begin.end() : stop.start()]
label = self.first(re.findall("\\\\label\{(.*)\}", sub_string))
caption = self.first(re.findall("\\\\caption\{(.*)\}", sub_string))
footer = sub_string[
re.search("\\\\end\{tabular\}", sub_string).end() :
].replace("\n", "")
sub_string = sub_string[
re.search("\\\\begin\{tabular\}", sub_string)
.end() : re.search("\\\\end\{tabular\}", sub_string)
.start()
]
content = [line.split("&") for line in sub_string.split("\n")[1:]]
[
content.pop(row - 1)
for row in range(len(content), 0, -1)
if len(content[row - 1]) <= 1
]
tbl_list.append(
Table(
span=(begin.start(), stop.end()),
index=index + 1,
caption=caption,
content=content,
footer=footer,
label=label,
)
)
return tbl_list
def replace_figures(self) -> None:
"""Dereference and replace all figures with markdown formatting."""
fig_list = self.figures
fig_list.reverse()
for figure in fig_list:
self.tex_src = (
self.tex_src[: figure.span[0]]
+ figure.markdown
+ self.tex_src[figure.span[1] :]
)
for figure in fig_list:
self.tex_src = re.sub(
"\\\\ref\{" + figure.label + "\}",
str(figure.index),
self.tex_src,
)
def replace_tables(self) -> None:
"""Dereference and replace all tables with markdown formatting."""
tbl_list = self.tables
tbl_list.reverse()
for table in tbl_list:
self.tex_src = (
self.tex_src[: table.span[0]]
+ table.markdown
+ self.tex_src[table.span[1] :]
)
for table in tbl_list:
self.tex_src = re.sub(
"\\\\ref\{" + table.label + "\}",
str(table.index),
self.tex_src,
)
def replace_equations(self) -> None:
"""Dereference and replace all equations with markdown formatting."""
eq_list = self.equations
eq_list.reverse()
for equation in eq_list:
self.tex_src = (
self.tex_src[: equation.span[0]]
+ equation.markdown
+ self.tex_src[equation.span[1] :]
)
for equation in eq_list:
self.tex_src = re.sub(
"\\\\ref\{" + equation.label + "\}",
str(equation.index),
self.tex_src,
)
@classmethod
def parse_bbl(cls, lines: List[str]) -> List[Citation]:
"""Parse BBL contents for bibtec refereces."""
return [
Citation(
label=re.match("\\\\bibitem{([0-z\.\-_]*)}", entry).groups()[
0
],
index=index + 1,
name=re.sub(
"\\\\emph|\\\\BIBentry[A-z]*wordspacing|\\\\bibitem\{([0-z]*)\}|[\{\}\n~]|\\\\url",
"",
entry,
),
)
for index, entry in enumerate("".join(lines).split("\n\n")[1:-1])
]
@classmethod
def md_bbl(cls, src_bbl: List[Citation]) -> str:
bbl_str = "# References:\n\n"
src_bbl.sort(key=lambda x: x.index)
for citation in src_bbl:
bbl_str += f"[^{citation.index}]: {citation.name}\n"
return bbl_str
def replace_sections(self) -> None:
"""Dereference and replace all sections with markdown formatting."""
sc_list = self.sections
sc_list.reverse()
for section in sc_list:
self.tex_src = (
self.tex_src[: section.span[0]]
+ section.markdown
+ self.tex_src[section.span[1] :]
)
for sc in sc_list:
self.tex_src = re.sub(
"\\\\ref\{" + sc.label + "\}", str(sc.index), self.tex_src
)
def preprocess(self) -> None:
"""Prep proceedure for customized formatting."""
custom_rules = [
(r"\n(\w)*%.*", ""),
(r"}(\n)*\\label", r"}\\label"),
(r" %.*", ""),
(r"\\usection", r"\\section"),
(r"\\usubsection", r"\\subsection"),
(r"\\begin{abstract}", r"\\section{Abstract}"),
(r"\\end{abstract}", r""),
(r"\\maketitle", r""),
(r"\\IEEEpeerreviewmaketitle", r""),
(r"\\bstctlcite{[0-z:]*}", r""),
(r"\\clearpage", r""),
(r"\\pagebreak", r""),
(r"\\\\[ ]*", r""),
(r"\$\\,\$", r" "),
(r"\$([0-z\\\.\-\+_,{}%\(\)\/]*)\$", r"\\\\(\g<1>\\\\)"),
(r"\\flushleft", r""),
(r"\\begin{flushleft}", r""),
(r"\\end{flushleft}", r""),
(r"\\begin{center}", r""),
(r"\\end{center}", r""),
]
for rule, result in custom_rules:
self.tex_src = re.sub(rule, result, self.tex_src)
def postprocess(self) -> None:
"""Clean up proceedure for customized formatting."""
custom_rules = [
(r"\\,", r" "),
(r"~", r" "),
(r"\\tsqrd", r"²"),
(r"\\rpi", r"π"),
(r"\\rmu", r"μ"),
(r"\\rbeta", r"β"),
(r"\\ralpha", r"α"),
(r"\\rDelta", r"Δ"),
(r"\\rdelta", r"δ"),
(r"\\rsigma", r"σ"),
(r"\\rSigma", r"Σ"),
(r"\\rtau", r"τ"),
(r"\\reta", r"η"),
(r"\\rphi", r"φ"),
(r"\\rPhi", r"Φ"),
(r"\\romeg", r"ω"),
(r"\\rOmeg", r"Ω"),
(r"\\vspace\{[0-z \.\-\+]*\}", r""),
(r"\\hspace\{[0-z \.\-\+]*\}", r""),
(r"\\vfill", r""),
(r"\\hfill", r""),
(r" \\& ", r" & "),
(r"\$\\pm\$", "±"),
(r"\\tss\{([0-z,_\. ]*)\}", r"<sub>\g<1><sub>"),
(r"\\tps\{([0-z,_\. ]*)\}", r"<sup>\g<1><sup>"),
(r"\\textbf\{([0-z\.,_ ]*)\}", r"**\g<1>**"),
(r"\\sqrt", r""),
(r"\\%", "%"),
]
for rule, result in custom_rules:
self.tex_src = re.sub(rule, result, self.tex_src)
@classmethod
def flatten_input(cls, tex_src: str, basedir: Path = Path(".")) -> str:
"""Recusive method for generating a flattened latex source."""
for source_file in re.findall("input\{([0-z\/\.\_\-]*)\}", tex_src):
tex_input = list()
with open(
path.join(basedir, source_file), "r", encoding="utf8"
) as input:
for line in input.readlines():
tex_input.append(
line
if not re.findall("input\{([0-z\/\.\_\-]*)\}", line)
else cls.flatten_input(line, basedir=basedir)
)
tex_src = tex_src.replace(
"\\input{" + source_file + "}", "".join(tex_input)
)
return tex_src
def replace_references(self, bbl_file: Path) -> None:
with open(bbl_file, "r") as file:
bbl_list = self.parse_bbl(file.readlines())
bbl_list.sort(key=lambda x: x.label)
bbl_list.reverse()
for bbl in bbl_list:
self.tex_src = re.sub(
"\\\\cite{([0-z,\.\-_]*)("
+ bbl.label
+ ")([0-z,\.\-_]*)}",
f"[^{bbl.index}]" + "\\\\cite{\g<1>\g<3>}",
self.tex_src,
)
self.tex_src = re.sub(
r"\\bibliographystyle\{[0-z,\-_\/\.]*\}", "", self.tex_src
)
self.tex_src = re.sub(
r"\\bibliography\{[0-z,\-_\.\/]*\}",
self.md_bbl(bbl_list),
self.tex_src,
)
self.tex_src = re.sub(r"\\cite{[,]*}", "", self.tex_src)
def strip_tex(self) -> None:
"""Clear default TEX preable - postamble."""
begin = re.search(r"\\begin\{document\}", self.tex_src).end()
end = re.search(r"\\end\{document\}", self.tex_src).start()
self.tex_src = self.tex_src[begin:end]
def filter_tex(self, bbl_file: Path) -> None:
"""Default TEX filterting proceedure."""
self.strip_tex()
self.preprocess()
self.replace_references(bbl_file)
self.replace_figures()
self.replace_tables()
self.replace_equations()
self.replace_sections()
self.postprocess()
for file in sys.argv[1:]:
print(LatexFile(file).tex_src)
# eof