Mirrored from GitHub

git clone https://github.com/christc4/md2html-awk.git

Jump to: README.md md2html-minified.awk


README.md

1# werc-bin
2
3## md2html-minified.awk
4
5<sub>markdown handler</sub>
6
7## About
8
9This is a much more stripped down version of the famous 'md2html.awk' by Jesus Galan (yiyus), 2009.
10
11## Rationale
12
13Unless you are caching the `.html` generated, `awk` is already a sub-optimal choice to use for a `md2html` converter when there exist tools like [SMU](https://karlb.github.io/smu/), written in C.
14
15I don't know C and I was already learning `awk`.
16
17Less logic, fewer system calls, fewer CPU cycles.
18
19## ~~Features~~ *lack thereof*
20
21- No character escaping  
22  - Manually insert HTML entities  
23- ATX headers only
24- Minified output
25
26## Some differences
27
28There are many differences, but this is a small example.
29
30This was responsible for converting `#` to its appropriate `<h1>` tag:
31
32```awk
33/^#+/ && (!newli || par=="p" || /^##/)
34{ for(n = 0; n < 6 && sub(/^# */, ""); n++)
35 sub(/#$/, ""); par = "h" n; }
36```
37
38Less logic is needed for this:
39
40```awk
41/^# / { par = "h1"; sub(/^# +/, ""); }
42(/^## /) { par = "h2"; sub(/^## +/, ""); }
43(/^### /) { par = "h3"; sub(/^### +/, ""); }
44```
45
46## Bugs
47
48`%` has to be escaped via `%%`

md2html-minified.awk

1#!/bin/awk -f
2
3function oprint(t) { if(nr == 0) otext = otext t; else otext = otext t; }
4
5function nextil(t) {
6	if(!match(t, /[`<\[*_\\]|(\!\[)/)) return t
7	t1 = substr(t, 1, RSTART - 1)
8	tag = substr(t, RSTART, RLENGTH)
9	t2 = substr(t, RSTART + RLENGTH)
10	if(ilcode && tag != "`"){ return t1 tag nextil(t2);}
11	if(tag == "`"){
12		if(sub(/^`/, "", t2)){ if(!match(t2, /``/)) return t1 nextil(t2); ilcode2 = !ilcode2; }
13		else if(ilcode2) return t1 tag nextil(t2);
14		tag = "<pre>";
15		if(ilcode){ tag = "</pre>"; }
16		ilcode = !ilcode; return t1 tag nextil(t2);
17	}
18	if(tag == "\\"){ if(match(t2, /^[\\*_{}\[\]()#+\-\.!]/)){ tag = substr(t2, 1, 1)
19	t2 = substr(t2, 2); }
20	return t1 tag nextil(t2); }
21
22	if(tag == "<"){
23		if(match(t2, /^[a-z\/][^>]*>/)){
24			tag = tag substr(t2, RSTART, RLENGTH)
25			t2 = substr(t2, RLENGTH + 1)
26			return t1 tag nextil(t2);
27		}
28        }
29
30        if(tag == "["){
31		if(!match(t2, /(\[.*\])|(\(.*\))/)) return t1 tag nextil(t2); match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/)
32		linktext = substr(t2, 1, RLENGTH)
33		t2 = substr(t2, RLENGTH + 2); if(match(t2, /^\(/)){ match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/)
34		url = substr(t2, 2, RLENGTH - 1)
35		pt2 = substr(t2, RLENGTH + 2)
36		return t1 "<a href="url">"nextil(linktext)"</a>" nextil(pt2); }
37	}
38
39	 if(match(tag, /[*_]/)){ ntag = tag; if(sub("^" tag, "", t2)){ if(stag[ns] == tag && match(t2, "^" tag)) t2 = tag t2; else ntag = tag tag; } n = length(ntag)
40	tag = (n == 2) ? "b" : "i"
41	if(match(t1, / $/) && match(t2, /^ /)) return t1 tag nextil(t2)
42	if(stag[ns] == ntag){ tag = "/" tag; ns--; } else stag[++ns] = ntag
43	tag = "<" tag ">"
44	return t1 tag nextil(t2); } }
45
46function inline(t){ ilcode = 0; ilcode2 = 0; ns = 0; return nextil(t); }
47
48function printp(tag){ if(!match(text, /^[ ]*$/)){ text = inline(text);if(tag != "") oprint("<" tag ">" text "</" tag ">"); else oprint(text); } text = ""; }
49
50{
51	for(nnl = 0; nnl < nl; nnl++)
52	if(match(block[nnl + 1], /[ou]l/) && !sub(/^(    |	)/, ""))
53	    break;
54}
55
56{ newli = 0; }
57
58!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +|	)/ {
59    sub(/^ ? ? ?- *( +| )/, "");
60    nnl++;
61    nblock[nnl] = "ul";
62    newli = 1;
63}
64
65newli { if(blank && nnl == nl && !par) par = "p"; blank = 0; printp(par); if(nnl == nl && block[nl] == nblock[nl]) oprint("<li>"); }
66
67nnl != nl || nblock[nl] != block[nl] {
68    printp(par);
69    b = (nnl > nl) ? nblock[nnl] : block[nl];
70    par = (match(b, /[ou]l/)) ? "" : "p";
71}
72
73nnl < nl  { for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ oprint("</" block[nl] ">"); } }
74
75nnl > nl {
76    for(; nl < nnl; nl++){
77	block[nl + 1] = nblock[nl + 1];
78	oprint("<" block[nl + 1] ">");
79	if(match(block[nl + 1], /[ou]l/))
80	    oprint("<li>");
81    }
82}
83
84/^# / { par = "h1"; sub(/^# +/, ""); }; (/^## /) { par = "h2"; sub(/^## +/, ""); }; (/^### /) { par = "h3"; sub(/^### +/, ""); }
85
86/^$/ { printp(par); par = "p"; next; }
87{ text = (text ? text " " : "") $0; }
88END{printp(par);printf(otext);}
89