Remove outdated toc_pages.txt, add new Go modules for IHK Chemnitz PDF rendering including diagrams, tables, and TOC functionality.
This commit is contained in:
+214
-118
@@ -2,25 +2,22 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/zlib"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark-meta"
|
||||
meta "github.com/yuin/goldmark-meta"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
extast "github.com/yuin/goldmark/extension/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/text"
|
||||
"gopkg.in/yaml.v3"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseMarkdown reads a Markdown file, extracts the YAML front matter into a
|
||||
// Config and returns the parsed AST together with the raw source bytes.
|
||||
func ParseMarkdown(mdPath string) (Config, ast.Node, []byte, error) {
|
||||
content, err := os.ReadFile(mdPath)
|
||||
if err != nil {
|
||||
@@ -31,146 +28,287 @@ func ParseMarkdown(mdPath string) (Config, ast.Node, []byte, error) {
|
||||
goldmark.WithExtensions(meta.Meta, extension.Table),
|
||||
)
|
||||
|
||||
context := parser.NewContext()
|
||||
doc := md.Parser().Parse(text.NewReader(content), parser.WithContext(context))
|
||||
ctx := parser.NewContext()
|
||||
doc := md.Parser().Parse(text.NewReader(content), parser.WithContext(ctx))
|
||||
|
||||
metaData := meta.Get(context)
|
||||
|
||||
// Convert metaData map to Config struct
|
||||
metaData := meta.Get(ctx)
|
||||
var config Config
|
||||
yamlData, _ := yaml.Marshal(metaData)
|
||||
err = yaml.Unmarshal(yamlData, &config)
|
||||
if err != nil {
|
||||
return Config{}, nil, nil, fmt.Errorf("error parsing metadata: %v", err)
|
||||
raw, _ := yaml.Marshal(metaData)
|
||||
if err = yaml.Unmarshal(raw, &config); err != nil {
|
||||
return Config{}, nil, nil, fmt.Errorf("YAML front matter: %w", err)
|
||||
}
|
||||
|
||||
return config, doc, content, nil
|
||||
}
|
||||
|
||||
// parserState tracks transient state during the AST walk.
|
||||
type parserState struct {
|
||||
nextCodeIsAppendix bool
|
||||
appendixTitle string
|
||||
listStack []listFrame // stack for nested list tracking
|
||||
}
|
||||
|
||||
// listFrame tracks the type and item counter for one list nesting level.
|
||||
type listFrame struct {
|
||||
ordered bool
|
||||
index int
|
||||
}
|
||||
|
||||
// RenderAST walks the Goldmark AST and dispatches to IHKRenderer methods.
|
||||
//
|
||||
// Front-matter detection: a level-1 heading that is not a numbered section
|
||||
// and has a name in the front-matter list ("Vorwort", "Abkürzungsverzeichnis")
|
||||
// stays in Roman-numeral territory. All other level-1 headings trigger
|
||||
// StartMainBody() and switch to Arabic page numbering.
|
||||
func RenderAST(doc ast.Node, content []byte, r *IHKRenderer) error {
|
||||
r.StartFrontMatter()
|
||||
state := &parserState{}
|
||||
|
||||
return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
switch node := n.(type) {
|
||||
|
||||
// ── Headings ──────────────────────────────────────────────────────────
|
||||
case *ast.Heading:
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
title := extractPlainText(node, content)
|
||||
if node.Level == 1 && r.numType == NumRoman {
|
||||
title := extractText(node, content)
|
||||
if title != "Vorwort" && title != "Abkürzungsverzeichnis" {
|
||||
if !isFrontMatterSection(title) {
|
||||
r.StartMainBody()
|
||||
}
|
||||
}
|
||||
title := extractText(node, content)
|
||||
r.RenderHeader(node.Level, title)
|
||||
return ast.WalkSkipChildren, nil
|
||||
|
||||
// ── Paragraphs ────────────────────────────────────────────────────────
|
||||
case *ast.Paragraph:
|
||||
text := extractText(node, content)
|
||||
lines := strings.Split(text, "\n")
|
||||
isMeta := false
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "@Quelle:") {
|
||||
r.AddSource(strings.TrimPrefix(line, "@Quelle:"))
|
||||
isMeta = true
|
||||
} else if strings.HasPrefix(line, "@Anhang:") {
|
||||
r.AddAppendix(strings.TrimPrefix(line, "@Anhang:"))
|
||||
isMeta = true
|
||||
} else if strings.HasPrefix(line, "@AnhangUML:") {
|
||||
state.nextCodeIsAppendix = true
|
||||
state.appendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangUML:"))
|
||||
isMeta = true
|
||||
}
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
if isMeta {
|
||||
plain := extractPlainText(node, content)
|
||||
|
||||
// Special directives embedded in paragraphs
|
||||
if handled := handleDirectives(plain, state, r); handled {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
r.RenderParagraph(text)
|
||||
|
||||
spans := extractInlineSpans(node, content)
|
||||
r.RenderParagraphSpans(spans)
|
||||
return ast.WalkSkipChildren, nil
|
||||
|
||||
// ── Fenced code blocks ────────────────────────────────────────────────
|
||||
case *ast.FencedCodeBlock:
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
lang := string(node.Language(content))
|
||||
code := extractCode(node, content)
|
||||
code := extractCodeBlock(node, content)
|
||||
|
||||
if lang == "mermaid" || lang == "plantuml" || lang == "puml" {
|
||||
imgPath, err := RenderDiagramViaKroki(lang, code)
|
||||
if err == nil {
|
||||
caption := lang
|
||||
if state.nextCodeIsAppendix {
|
||||
r.AddAppendix(state.appendixTitle + " | " + imgPath)
|
||||
state.nextCodeIsAppendix = false
|
||||
} else {
|
||||
r.RenderImage(imgPath, "Diagramm: "+lang)
|
||||
r.RenderImage(imgPath, "Diagram ("+caption+")")
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
// Fall through: render as plain code block on error
|
||||
}
|
||||
case *ast.Image:
|
||||
imgPath := string(node.Destination)
|
||||
title := string(node.Title)
|
||||
r.RenderImage(imgPath, title)
|
||||
// Render non-diagram code blocks as monospace paragraphs
|
||||
r.RenderParagraphSpans([]InlineSpan{{Text: code, Code: true}})
|
||||
return ast.WalkSkipChildren, nil
|
||||
|
||||
// ── Images ────────────────────────────────────────────────────────────
|
||||
case *ast.Image:
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
imgPath := string(node.Destination)
|
||||
caption := extractPlainText(node, content)
|
||||
if caption == "" {
|
||||
caption = string(node.Title)
|
||||
}
|
||||
r.RenderImage(imgPath, caption)
|
||||
return ast.WalkSkipChildren, nil
|
||||
|
||||
// ── Block quotes (alternative @Quelle syntax) ─────────────────────────
|
||||
case *ast.Blockquote:
|
||||
// Check if first paragraph starts with "Quelle:"
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
first := node.FirstChild()
|
||||
if first != nil {
|
||||
if para, ok := first.(*ast.Paragraph); ok {
|
||||
pText := extractText(para, content)
|
||||
pText := extractPlainText(para, content)
|
||||
if strings.HasPrefix(pText, "Quelle:") || strings.HasPrefix(pText, "Source:") {
|
||||
sourceText := strings.TrimPrefix(pText, "Quelle:")
|
||||
sourceText = strings.TrimPrefix(sourceText, "Source:")
|
||||
r.AddSource(sourceText)
|
||||
src := strings.TrimPrefix(strings.TrimPrefix(pText, "Quelle:"), "Source:")
|
||||
r.AddSource(src)
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
|
||||
// ── Lists ─────────────────────────────────────────────────────────────
|
||||
case *ast.List:
|
||||
// Items will be handled by ListItem
|
||||
if entering {
|
||||
state.listStack = append(state.listStack, listFrame{ordered: node.IsOrdered()})
|
||||
} else {
|
||||
if len(state.listStack) > 0 {
|
||||
state.listStack = state.listStack[:len(state.listStack)-1]
|
||||
}
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
|
||||
case *ast.ListItem:
|
||||
text := extractText(node, content)
|
||||
r.RenderListItem(text, true, 0) // Basic bullet point for now
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
depth := len(state.listStack) - 1
|
||||
if depth < 0 {
|
||||
depth = 0
|
||||
}
|
||||
frame := &state.listStack[depth]
|
||||
frame.index++
|
||||
spans := extractInlineSpansFromListItem(node, content)
|
||||
r.RenderListItem(spans, frame.ordered, frame.index, depth)
|
||||
return ast.WalkSkipChildren, nil
|
||||
|
||||
// ── Tables ────────────────────────────────────────────────────────────
|
||||
case *extast.Table:
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
var tableData [][]string
|
||||
for row := node.FirstChild(); row != nil; row = row.NextSibling() {
|
||||
var rowData []string
|
||||
for cell := row.FirstChild(); cell != nil; cell = cell.NextSibling() {
|
||||
rowData = append(rowData, extractText(cell, content))
|
||||
rowData = append(rowData, extractPlainText(cell, content))
|
||||
}
|
||||
tableData = append(tableData, rowData)
|
||||
}
|
||||
r.RenderTable(tableData)
|
||||
r.RenderTable(tableData, "")
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
return ast.WalkContinue, nil
|
||||
})
|
||||
}
|
||||
|
||||
func extractText(n ast.Node, content []byte) string {
|
||||
var textStr string
|
||||
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
|
||||
if textNode, ok := child.(*ast.Text); ok {
|
||||
textStr += string(textNode.Segment.Value(content))
|
||||
if textNode.HardLineBreak() || textNode.SoftLineBreak() {
|
||||
textStr += "\n"
|
||||
}
|
||||
} else {
|
||||
textStr += extractText(child, content)
|
||||
}
|
||||
// isFrontMatterSection returns true for level-1 headings that belong to the
|
||||
// Roman-numbered front matter (before the main body begins).
|
||||
func isFrontMatterSection(title string) bool {
|
||||
switch strings.TrimSpace(title) {
|
||||
case "Vorwort", "Einleitung", "Abkürzungsverzeichnis":
|
||||
return true
|
||||
}
|
||||
if textStr == "" {
|
||||
// Fallback for simple nodes
|
||||
return string(n.Text(content))
|
||||
}
|
||||
return textStr
|
||||
// Everything else (including numbered sections like "1. Problem Statement")
|
||||
// belongs to the Arabic-numbered main body.
|
||||
return false
|
||||
}
|
||||
|
||||
func extractCode(n *ast.FencedCodeBlock, content []byte) string {
|
||||
// handleDirectives processes special @-prefixed control lines in a paragraph.
|
||||
// Returns true if the paragraph was fully consumed as a directive.
|
||||
func handleDirectives(text string, state *parserState, r *IHKRenderer) bool {
|
||||
lines := strings.Split(text, "\n")
|
||||
handled := false
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
switch {
|
||||
case strings.HasPrefix(line, "@Quelle:"):
|
||||
r.AddSource(strings.TrimSpace(strings.TrimPrefix(line, "@Quelle:")))
|
||||
handled = true
|
||||
case strings.HasPrefix(line, "@Anhang:"):
|
||||
r.AddAppendix(strings.TrimSpace(strings.TrimPrefix(line, "@Anhang:")))
|
||||
handled = true
|
||||
case strings.HasPrefix(line, "@AnhangUML:"):
|
||||
state.nextCodeIsAppendix = true
|
||||
state.appendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangUML:"))
|
||||
handled = true
|
||||
}
|
||||
}
|
||||
return handled
|
||||
}
|
||||
|
||||
// extractPlainText returns the plain-text content of an AST node by
|
||||
// recursively concatenating all text leaf nodes.
|
||||
func extractPlainText(n ast.Node, content []byte) string {
|
||||
var sb strings.Builder
|
||||
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
|
||||
if t, ok := child.(*ast.Text); ok {
|
||||
sb.Write(t.Segment.Value(content))
|
||||
if t.SoftLineBreak() || t.HardLineBreak() {
|
||||
sb.WriteByte('\n')
|
||||
}
|
||||
} else {
|
||||
sb.WriteString(extractPlainText(child, content))
|
||||
}
|
||||
}
|
||||
if sb.Len() == 0 {
|
||||
return string(n.Text(content))
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// extractInlineSpans walks the children of a paragraph node and builds a
|
||||
// slice of InlineSpan values that preserve bold, italic, and code formatting.
|
||||
func extractInlineSpans(n ast.Node, content []byte) []InlineSpan {
|
||||
var spans []InlineSpan
|
||||
walkInline(n, content, false, false, false, &spans)
|
||||
return spans
|
||||
}
|
||||
|
||||
// extractInlineSpansFromListItem extracts spans from the first paragraph child
|
||||
// of a list item, which is how Goldmark represents list item content.
|
||||
func extractInlineSpansFromListItem(item *ast.ListItem, content []byte) []InlineSpan {
|
||||
for child := item.FirstChild(); child != nil; child = child.NextSibling() {
|
||||
if _, ok := child.(*ast.Paragraph); ok {
|
||||
return extractInlineSpans(child, content)
|
||||
}
|
||||
// TextBlock is used for tight lists
|
||||
return extractInlineSpans(child, content)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// walkInline recursively collects InlineSpan values from an AST subtree,
|
||||
// propagating bold/italic context down through Emphasis nodes.
|
||||
func walkInline(n ast.Node, content []byte, bold, italic, code bool, out *[]InlineSpan) {
|
||||
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
|
||||
switch c := child.(type) {
|
||||
case *ast.Text:
|
||||
seg := string(c.Segment.Value(content))
|
||||
if c.SoftLineBreak() {
|
||||
seg += " "
|
||||
} else if c.HardLineBreak() {
|
||||
seg += "\n"
|
||||
}
|
||||
if seg != "" {
|
||||
*out = append(*out, InlineSpan{Text: seg, Bold: bold, Italic: italic, Code: code})
|
||||
}
|
||||
case *ast.Emphasis:
|
||||
childBold := bold || c.Level == 2
|
||||
childItalic := italic || c.Level == 1
|
||||
walkInline(c, content, childBold, childItalic, code, out)
|
||||
case *ast.CodeSpan:
|
||||
raw := string(c.Text(content))
|
||||
*out = append(*out, InlineSpan{Text: raw, Bold: bold, Italic: italic, Code: true})
|
||||
case *ast.Link:
|
||||
// Render link text; the href is not shown (no footnote support yet).
|
||||
walkInline(c, content, bold, italic, code, out)
|
||||
default:
|
||||
walkInline(c, content, bold, italic, code, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extractCodeBlock returns the raw source text of a fenced code block.
|
||||
func extractCodeBlock(n *ast.FencedCodeBlock, content []byte) string {
|
||||
var buf bytes.Buffer
|
||||
for i := 0; i < n.Lines().Len(); i++ {
|
||||
line := n.Lines().At(i)
|
||||
@@ -178,45 +316,3 @@ func extractCode(n *ast.FencedCodeBlock, content []byte) string {
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func RenderDiagramViaKroki(lang string, code string) (string, error) {
|
||||
if lang == "puml" {
|
||||
lang = "plantuml"
|
||||
}
|
||||
|
||||
// Kroki encoding: zlib + base64url
|
||||
var b bytes.Buffer
|
||||
w := zlib.NewWriter(&b)
|
||||
w.Write([]byte(code))
|
||||
w.Close()
|
||||
|
||||
encoded := base64.URLEncoding.EncodeToString(b.Bytes())
|
||||
url := fmt.Sprintf("https://kroki.io/%s/png/%s", lang, encoded)
|
||||
|
||||
// Cache based on hash
|
||||
hash := fmt.Sprintf("%x", sha256.Sum256([]byte(code)))
|
||||
cachePath := filepath.Join(os.TempDir(), "ihk_cache_"+hash+".png")
|
||||
|
||||
if _, err := os.Stat(cachePath); err == nil {
|
||||
return cachePath, nil
|
||||
}
|
||||
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return "", fmt.Errorf("kroki error: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
out, err := os.Create(cachePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
return cachePath, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user