Remove outdated toc_pages.txt, add new Go modules for IHK Chemnitz PDF rendering including diagrams, tables, and TOC functionality.

2026-05-04 22:06:28 +02:00
parent e98f7efa52
commit 81745b5f48
23 changed files with 1532 additions and 809 deletions
@@ -2,25 +2,22 @@ package main

 import (
 	"bytes"
-	"compress/zlib"
-	"crypto/sha256"
-	"encoding/base64"
 	"fmt"
+	"os"
+	"strings"
+
 	"github.com/yuin/goldmark"
-	"github.com/yuin/goldmark-meta"
+	meta "github.com/yuin/goldmark-meta"
 	"github.com/yuin/goldmark/ast"
 	"github.com/yuin/goldmark/extension"
 	extast "github.com/yuin/goldmark/extension/ast"
 	"github.com/yuin/goldmark/parser"
 	"github.com/yuin/goldmark/text"
 	"gopkg.in/yaml.v3"
-	"io"
-	"net/http"
-	"os"
-	"path/filepath"
-	"strings"
 )

+// ParseMarkdown reads a Markdown file, extracts the YAML front matter into a
+// Config and returns the parsed AST together with the raw source bytes.
 func ParseMarkdown(mdPath string) (Config, ast.Node, []byte, error) {
 	content, err := os.ReadFile(mdPath)
 	if err != nil {
@@ -31,146 +28,287 @@ func ParseMarkdown(mdPath string) (Config, ast.Node, []byte, error) {
 		goldmark.WithExtensions(meta.Meta, extension.Table),
 	)

-	context := parser.NewContext()
-	doc := md.Parser().Parse(text.NewReader(content), parser.WithContext(context))
+	ctx := parser.NewContext()
+	doc := md.Parser().Parse(text.NewReader(content), parser.WithContext(ctx))

-	metaData := meta.Get(context)
-
-	// Convert metaData map to Config struct
+	metaData := meta.Get(ctx)
 	var config Config
-	yamlData, _ := yaml.Marshal(metaData)
-	err = yaml.Unmarshal(yamlData, &config)
-	if err != nil {
-		return Config{}, nil, nil, fmt.Errorf("error parsing metadata: %v", err)
+	raw, _ := yaml.Marshal(metaData)
+	if err = yaml.Unmarshal(raw, &config); err != nil {
+		return Config{}, nil, nil, fmt.Errorf("YAML front matter: %w", err)
 	}

 	return config, doc, content, nil
 }

+// parserState tracks transient state during the AST walk.
 type parserState struct {
 	nextCodeIsAppendix bool
 	appendixTitle      string
+	listStack          []listFrame // stack for nested list tracking
 }

+// listFrame tracks the type and item counter for one list nesting level.
+type listFrame struct {
+	ordered bool
+	index   int
+}
+
+// RenderAST walks the Goldmark AST and dispatches to IHKRenderer methods.
+//
+// Front-matter detection: a level-1 heading that is not a numbered section
+// and has a name in the front-matter list ("Vorwort", "Abkürzungsverzeichnis")
+// stays in Roman-numeral territory. All other level-1 headings trigger
+// StartMainBody() and switch to Arabic page numbering.
 func RenderAST(doc ast.Node, content []byte, r *IHKRenderer) error {
 	r.StartFrontMatter()
 	state := &parserState{}

 	return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
-		if !entering {
-			return ast.WalkContinue, nil
-		}
-
 		switch node := n.(type) {
+
+		// ── Headings ──────────────────────────────────────────────────────────
 		case *ast.Heading:
+			if !entering {
+				return ast.WalkContinue, nil
+			}
+			title := extractPlainText(node, content)
 			if node.Level == 1 && r.numType == NumRoman {
-				title := extractText(node, content)
-				if title != "Vorwort" && title != "Abkürzungsverzeichnis" {
+				if !isFrontMatterSection(title) {
 					r.StartMainBody()
 				}
 			}
-			title := extractText(node, content)
 			r.RenderHeader(node.Level, title)
 			return ast.WalkSkipChildren, nil
+
+		// ── Paragraphs ────────────────────────────────────────────────────────
 		case *ast.Paragraph:
-			text := extractText(node, content)
-			lines := strings.Split(text, "\n")
-			isMeta := false
-			for _, line := range lines {
-				line = strings.TrimSpace(line)
-				if strings.HasPrefix(line, "@Quelle:") {
-					r.AddSource(strings.TrimPrefix(line, "@Quelle:"))
-					isMeta = true
-				} else if strings.HasPrefix(line, "@Anhang:") {
-					r.AddAppendix(strings.TrimPrefix(line, "@Anhang:"))
-					isMeta = true
-				} else if strings.HasPrefix(line, "@AnhangUML:") {
-					state.nextCodeIsAppendix = true
-					state.appendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangUML:"))
-					isMeta = true
-				}
+			if !entering {
+				return ast.WalkContinue, nil
 			}
-			if isMeta {
+			plain := extractPlainText(node, content)
+
+			// Special directives embedded in paragraphs
+			if handled := handleDirectives(plain, state, r); handled {
 				return ast.WalkSkipChildren, nil
 			}
-			r.RenderParagraph(text)
+
+			spans := extractInlineSpans(node, content)
+			r.RenderParagraphSpans(spans)
 			return ast.WalkSkipChildren, nil
+
+		// ── Fenced code blocks ────────────────────────────────────────────────
 		case *ast.FencedCodeBlock:
+			if !entering {
+				return ast.WalkContinue, nil
+			}
 			lang := string(node.Language(content))
-			code := extractCode(node, content)
+			code := extractCodeBlock(node, content)
+
 			if lang == "mermaid" || lang == "plantuml" || lang == "puml" {
 				imgPath, err := RenderDiagramViaKroki(lang, code)
 				if err == nil {
+					caption := lang
 					if state.nextCodeIsAppendix {
 						r.AddAppendix(state.appendixTitle + " | " + imgPath)
 						state.nextCodeIsAppendix = false
 					} else {
-						r.RenderImage(imgPath, "Diagramm: "+lang)
+						r.RenderImage(imgPath, "Diagram ("+caption+")")
 					}
 					return ast.WalkSkipChildren, nil
 				}
+				// Fall through: render as plain code block on error
 			}
-		case *ast.Image:
-			imgPath := string(node.Destination)
-			title := string(node.Title)
-			r.RenderImage(imgPath, title)
+			// Render non-diagram code blocks as monospace paragraphs
+			r.RenderParagraphSpans([]InlineSpan{{Text: code, Code: true}})
 			return ast.WalkSkipChildren, nil
+
+		// ── Images ────────────────────────────────────────────────────────────
+		case *ast.Image:
+			if !entering {
+				return ast.WalkContinue, nil
+			}
+			imgPath := string(node.Destination)
+			caption := extractPlainText(node, content)
+			if caption == "" {
+				caption = string(node.Title)
+			}
+			r.RenderImage(imgPath, caption)
+			return ast.WalkSkipChildren, nil
+
+		// ── Block quotes (alternative @Quelle syntax) ─────────────────────────
 		case *ast.Blockquote:
-			// Check if first paragraph starts with "Quelle:"
+			if !entering {
+				return ast.WalkContinue, nil
+			}
 			first := node.FirstChild()
 			if first != nil {
 				if para, ok := first.(*ast.Paragraph); ok {
-					pText := extractText(para, content)
+					pText := extractPlainText(para, content)
 					if strings.HasPrefix(pText, "Quelle:") || strings.HasPrefix(pText, "Source:") {
-						sourceText := strings.TrimPrefix(pText, "Quelle:")
-						sourceText = strings.TrimPrefix(sourceText, "Source:")
-						r.AddSource(sourceText)
+						src := strings.TrimPrefix(strings.TrimPrefix(pText, "Quelle:"), "Source:")
+						r.AddSource(src)
 						return ast.WalkSkipChildren, nil
 					}
 				}
 			}
+			return ast.WalkContinue, nil
+
+		// ── Lists ─────────────────────────────────────────────────────────────
 		case *ast.List:
-			// Items will be handled by ListItem
+			if entering {
+				state.listStack = append(state.listStack, listFrame{ordered: node.IsOrdered()})
+			} else {
+				if len(state.listStack) > 0 {
+					state.listStack = state.listStack[:len(state.listStack)-1]
+				}
+			}
+			return ast.WalkContinue, nil
+
 		case *ast.ListItem:
-			text := extractText(node, content)
-			r.RenderListItem(text, true, 0) // Basic bullet point for now
+			if !entering {
+				return ast.WalkContinue, nil
+			}
+			depth := len(state.listStack) - 1
+			if depth < 0 {
+				depth = 0
+			}
+			frame := &state.listStack[depth]
+			frame.index++
+			spans := extractInlineSpansFromListItem(node, content)
+			r.RenderListItem(spans, frame.ordered, frame.index, depth)
 			return ast.WalkSkipChildren, nil
+
+		// ── Tables ────────────────────────────────────────────────────────────
 		case *extast.Table:
+			if !entering {
+				return ast.WalkContinue, nil
+			}
 			var tableData [][]string
 			for row := node.FirstChild(); row != nil; row = row.NextSibling() {
 				var rowData []string
 				for cell := row.FirstChild(); cell != nil; cell = cell.NextSibling() {
-					rowData = append(rowData, extractText(cell, content))
+					rowData = append(rowData, extractPlainText(cell, content))
 				}
 				tableData = append(tableData, rowData)
 			}
-			r.RenderTable(tableData)
+			r.RenderTable(tableData, "")
 			return ast.WalkSkipChildren, nil
 		}
+
 		return ast.WalkContinue, nil
 	})
 }

-func extractText(n ast.Node, content []byte) string {
-	var textStr string
-	for child := n.FirstChild(); child != nil; child = child.NextSibling() {
-		if textNode, ok := child.(*ast.Text); ok {
-			textStr += string(textNode.Segment.Value(content))
-			if textNode.HardLineBreak() || textNode.SoftLineBreak() {
-				textStr += "\n"
-			}
-		} else {
-			textStr += extractText(child, content)
-		}
+// isFrontMatterSection returns true for level-1 headings that belong to the
+// Roman-numbered front matter (before the main body begins).
+func isFrontMatterSection(title string) bool {
+	switch strings.TrimSpace(title) {
+	case "Vorwort", "Einleitung", "Abkürzungsverzeichnis":
+		return true
 	}
-	if textStr == "" {
-		// Fallback for simple nodes
-		return string(n.Text(content))
-	}
-	return textStr
+	// Everything else (including numbered sections like "1. Problem Statement")
+	// belongs to the Arabic-numbered main body.
+	return false
 }

-func extractCode(n *ast.FencedCodeBlock, content []byte) string {
+// handleDirectives processes special @-prefixed control lines in a paragraph.
+// Returns true if the paragraph was fully consumed as a directive.
+func handleDirectives(text string, state *parserState, r *IHKRenderer) bool {
+	lines := strings.Split(text, "\n")
+	handled := false
+	for _, line := range lines {
+		line = strings.TrimSpace(line)
+		switch {
+		case strings.HasPrefix(line, "@Quelle:"):
+			r.AddSource(strings.TrimSpace(strings.TrimPrefix(line, "@Quelle:")))
+			handled = true
+		case strings.HasPrefix(line, "@Anhang:"):
+			r.AddAppendix(strings.TrimSpace(strings.TrimPrefix(line, "@Anhang:")))
+			handled = true
+		case strings.HasPrefix(line, "@AnhangUML:"):
+			state.nextCodeIsAppendix = true
+			state.appendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangUML:"))
+			handled = true
+		}
+	}
+	return handled
+}
+
+// extractPlainText returns the plain-text content of an AST node by
+// recursively concatenating all text leaf nodes.
+func extractPlainText(n ast.Node, content []byte) string {
+	var sb strings.Builder
+	for child := n.FirstChild(); child != nil; child = child.NextSibling() {
+		if t, ok := child.(*ast.Text); ok {
+			sb.Write(t.Segment.Value(content))
+			if t.SoftLineBreak() || t.HardLineBreak() {
+				sb.WriteByte('\n')
+			}
+		} else {
+			sb.WriteString(extractPlainText(child, content))
+		}
+	}
+	if sb.Len() == 0 {
+		return string(n.Text(content))
+	}
+	return sb.String()
+}
+
+// extractInlineSpans walks the children of a paragraph node and builds a
+// slice of InlineSpan values that preserve bold, italic, and code formatting.
+func extractInlineSpans(n ast.Node, content []byte) []InlineSpan {
+	var spans []InlineSpan
+	walkInline(n, content, false, false, false, &spans)
+	return spans
+}
+
+// extractInlineSpansFromListItem extracts spans from the first paragraph child
+// of a list item, which is how Goldmark represents list item content.
+func extractInlineSpansFromListItem(item *ast.ListItem, content []byte) []InlineSpan {
+	for child := item.FirstChild(); child != nil; child = child.NextSibling() {
+		if _, ok := child.(*ast.Paragraph); ok {
+			return extractInlineSpans(child, content)
+		}
+		// TextBlock is used for tight lists
+		return extractInlineSpans(child, content)
+	}
+	return nil
+}
+
+// walkInline recursively collects InlineSpan values from an AST subtree,
+// propagating bold/italic context down through Emphasis nodes.
+func walkInline(n ast.Node, content []byte, bold, italic, code bool, out *[]InlineSpan) {
+	for child := n.FirstChild(); child != nil; child = child.NextSibling() {
+		switch c := child.(type) {
+		case *ast.Text:
+			seg := string(c.Segment.Value(content))
+			if c.SoftLineBreak() {
+				seg += " "
+			} else if c.HardLineBreak() {
+				seg += "\n"
+			}
+			if seg != "" {
+				*out = append(*out, InlineSpan{Text: seg, Bold: bold, Italic: italic, Code: code})
+			}
+		case *ast.Emphasis:
+			childBold := bold || c.Level == 2
+			childItalic := italic || c.Level == 1
+			walkInline(c, content, childBold, childItalic, code, out)
+		case *ast.CodeSpan:
+			raw := string(c.Text(content))
+			*out = append(*out, InlineSpan{Text: raw, Bold: bold, Italic: italic, Code: true})
+		case *ast.Link:
+			// Render link text; the href is not shown (no footnote support yet).
+			walkInline(c, content, bold, italic, code, out)
+		default:
+			walkInline(c, content, bold, italic, code, out)
+		}
+	}
+}
+
+// extractCodeBlock returns the raw source text of a fenced code block.
+func extractCodeBlock(n *ast.FencedCodeBlock, content []byte) string {
 	var buf bytes.Buffer
 	for i := 0; i < n.Lines().Len(); i++ {
 		line := n.Lines().At(i)
@@ -178,45 +316,3 @@ func extractCode(n *ast.FencedCodeBlock, content []byte) string {
 	}
 	return buf.String()
 }
-
-func RenderDiagramViaKroki(lang string, code string) (string, error) {
-	if lang == "puml" {
-		lang = "plantuml"
-	}
-
-	// Kroki encoding: zlib + base64url
-	var b bytes.Buffer
-	w := zlib.NewWriter(&b)
-	w.Write([]byte(code))
-	w.Close()
-
-	encoded := base64.URLEncoding.EncodeToString(b.Bytes())
-	url := fmt.Sprintf("https://kroki.io/%s/png/%s", lang, encoded)
-
-	// Cache based on hash
-	hash := fmt.Sprintf("%x", sha256.Sum256([]byte(code)))
-	cachePath := filepath.Join(os.TempDir(), "ihk_cache_"+hash+".png")
-
-	if _, err := os.Stat(cachePath); err == nil {
-		return cachePath, nil
-	}
-
-	resp, err := http.Get(url)
-	if err != nil {
-		return "", err
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != 200 {
-		return "", fmt.Errorf("kroki error: %d", resp.StatusCode)
-	}
-
-	out, err := os.Create(cachePath)
-	if err != nil {
-		return "", err
-	}
-	defer out.Close()
-
-	_, err = io.Copy(out, resp.Body)
-	return cachePath, err
-}