Mirrored from GitHub

git clone https://github.com/christc4/md2html-awk.git

Jump to: README.md md2html-minified.awk


README.md

1	# werc-bin
2	
3	## md2html-minified.awk
4	
5	<sub>markdown handler</sub>
6	
7	## About
8	
9	This is a much more stripped down version of the famous 'md2html.awk' by Jesus Galan (yiyus), 2009.
10	
11	## Rationale
12	
13	Unless you are caching the `.html` generated, `awk` is already a sub-optimal choice to use for a `md2html` converter when there exist tools like [SMU](https://karlb.github.io/smu/), written in C.
14	
15	I don't know C and I was already learning `awk`.
16	
17	Less logic, fewer system calls, fewer CPU cycles.
18	
19	## ~~Features~~ *lack thereof*
20	
21	- No character escaping  
22	  - Manually insert HTML entities  
23	- ATX headers only
24	- Minified output
25	
26	## Some differences
27	
28	There are many differences, but this is a small example.
29	
30	This was responsible for converting `#` to its appropriate `<h1>` tag:
31	
32	```awk
33	/^#+/ && (!newli || par=="p" || /^##/)
34	{ for(n = 0; n < 6 && sub(/^# */, ""); n++)
35	 sub(/#$/, ""); par = "h" n; }
36	```
37	
38	Less logic is needed for this:
39	
40	```awk
41	/^# / { par = "h1"; sub(/^# +/, ""); }
42	(/^## /) { par = "h2"; sub(/^## +/, ""); }
43	(/^### /) { par = "h3"; sub(/^### +/, ""); }
44	```
45	
46	## Bugs
47	
48	`%` has to be escaped via `%%`

md2html-minified.awk

1	#!/bin/awk -f
2	
3	function oprint(t) { if(nr == 0) otext = otext t; else otext = otext t; }
4	
5	function nextil(t) {
6		if(!match(t, /[`<\[*_\\]|(\!\[)/)) return t
7		t1 = substr(t, 1, RSTART - 1)
8		tag = substr(t, RSTART, RLENGTH)
9		t2 = substr(t, RSTART + RLENGTH)
10		if(ilcode && tag != "`"){ return t1 tag nextil(t2);}
11		if(tag == "`"){
12			if(sub(/^`/, "", t2)){ if(!match(t2, /``/)) return t1 nextil(t2); ilcode2 = !ilcode2; }
13			else if(ilcode2) return t1 tag nextil(t2);
14			tag = "<pre>";
15			if(ilcode){ tag = "</pre>"; }
16			ilcode = !ilcode; return t1 tag nextil(t2);
17		}
18		if(tag == "\\"){ if(match(t2, /^[\\*_{}\[\]()#+\-\.!]/)){ tag = substr(t2, 1, 1)
19		t2 = substr(t2, 2); }
20		return t1 tag nextil(t2); }
21	
22		if(tag == "<"){
23			if(match(t2, /^[a-z\/][^>]*>/)){
24				tag = tag substr(t2, RSTART, RLENGTH)
25				t2 = substr(t2, RLENGTH + 1)
26				return t1 tag nextil(t2);
27			}
28	        }
29	
30	        if(tag == "["){
31			if(!match(t2, /(\[.*\])|(\(.*\))/)) return t1 tag nextil(t2); match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/)
32			linktext = substr(t2, 1, RLENGTH)
33			t2 = substr(t2, RLENGTH + 2); if(match(t2, /^\(/)){ match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/)
34			url = substr(t2, 2, RLENGTH - 1)
35			pt2 = substr(t2, RLENGTH + 2)
36			return t1 "<a href="url">"nextil(linktext)"</a>" nextil(pt2); }
37		}
38	
39		 if(match(tag, /[*_]/)){ ntag = tag; if(sub("^" tag, "", t2)){ if(stag[ns] == tag && match(t2, "^" tag)) t2 = tag t2; else ntag = tag tag; } n = length(ntag)
40		tag = (n == 2) ? "b" : "i"
41		if(match(t1, / $/) && match(t2, /^ /)) return t1 tag nextil(t2)
42		if(stag[ns] == ntag){ tag = "/" tag; ns--; } else stag[++ns] = ntag
43		tag = "<" tag ">"
44		return t1 tag nextil(t2); } }
45	
46	function inline(t){ ilcode = 0; ilcode2 = 0; ns = 0; return nextil(t); }
47	
48	function printp(tag){ if(!match(text, /^[ ]*$/)){ text = inline(text);if(tag != "") oprint("<" tag ">" text "</" tag ">"); else oprint(text); } text = ""; }
49	
50	{
51		for(nnl = 0; nnl < nl; nnl++)
52		if(match(block[nnl + 1], /[ou]l/) && !sub(/^(    |	)/, ""))
53		    break;
54	}
55	
56	{ newli = 0; }
57	
58	!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +|	)/ {
59	    sub(/^ ? ? ?- *( +| )/, "");
60	    nnl++;
61	    nblock[nnl] = "ul";
62	    newli = 1;
63	}
64	
65	newli { if(blank && nnl == nl && !par) par = "p"; blank = 0; printp(par); if(nnl == nl && block[nl] == nblock[nl]) oprint("<li>"); }
66	
67	nnl != nl || nblock[nl] != block[nl] {
68	    printp(par);
69	    b = (nnl > nl) ? nblock[nnl] : block[nl];
70	    par = (match(b, /[ou]l/)) ? "" : "p";
71	}
72	
73	nnl < nl  { for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ oprint("</" block[nl] ">"); } }
74	
75	nnl > nl {
76	    for(; nl < nnl; nl++){
77		block[nl + 1] = nblock[nl + 1];
78		oprint("<" block[nl + 1] ">");
79		if(match(block[nl + 1], /[ou]l/))
80		    oprint("<li>");
81	    }
82	}
83	
84	/^# / { par = "h1"; sub(/^# +/, ""); }; (/^## /) { par = "h2"; sub(/^## +/, ""); }; (/^### /) { par = "h3"; sub(/^### +/, ""); }
85	
86	/^$/ { printp(par); par = "p"; next; }
87	{ text = (text ? text " " : "") $0; }
88	END{printp(par);printf(otext);}
89