Remove outdated toc_pages.txt, add new Go modules for IHK Chemnitz PDF rendering including diagrams, tables, and TOC functionality.

This commit is contained in:
Sebastian Unterschütz
2026-05-04 22:06:28 +02:00
parent e98f7efa52
commit 81745b5f48
23 changed files with 1532 additions and 809 deletions
+214 -118
View File
@@ -2,25 +2,22 @@ package main
import (
"bytes"
"compress/zlib"
"crypto/sha256"
"encoding/base64"
"fmt"
"os"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark-meta"
meta "github.com/yuin/goldmark-meta"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
extast "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"gopkg.in/yaml.v3"
"io"
"net/http"
"os"
"path/filepath"
"strings"
)
// ParseMarkdown reads a Markdown file, extracts the YAML front matter into a
// Config and returns the parsed AST together with the raw source bytes.
func ParseMarkdown(mdPath string) (Config, ast.Node, []byte, error) {
content, err := os.ReadFile(mdPath)
if err != nil {
@@ -31,146 +28,287 @@ func ParseMarkdown(mdPath string) (Config, ast.Node, []byte, error) {
goldmark.WithExtensions(meta.Meta, extension.Table),
)
context := parser.NewContext()
doc := md.Parser().Parse(text.NewReader(content), parser.WithContext(context))
ctx := parser.NewContext()
doc := md.Parser().Parse(text.NewReader(content), parser.WithContext(ctx))
metaData := meta.Get(context)
// Convert metaData map to Config struct
metaData := meta.Get(ctx)
var config Config
yamlData, _ := yaml.Marshal(metaData)
err = yaml.Unmarshal(yamlData, &config)
if err != nil {
return Config{}, nil, nil, fmt.Errorf("error parsing metadata: %v", err)
raw, _ := yaml.Marshal(metaData)
if err = yaml.Unmarshal(raw, &config); err != nil {
return Config{}, nil, nil, fmt.Errorf("YAML front matter: %w", err)
}
return config, doc, content, nil
}
// parserState tracks transient state during the AST walk.
type parserState struct {
nextCodeIsAppendix bool
appendixTitle string
listStack []listFrame // stack for nested list tracking
}
// listFrame tracks the type and item counter for one list nesting level.
type listFrame struct {
ordered bool
index int
}
// RenderAST walks the Goldmark AST and dispatches to IHKRenderer methods.
//
// Front-matter detection: a level-1 heading that is not a numbered section
// and has a name in the front-matter list ("Vorwort", "Abkürzungsverzeichnis")
// stays in Roman-numeral territory. All other level-1 headings trigger
// StartMainBody() and switch to Arabic page numbering.
func RenderAST(doc ast.Node, content []byte, r *IHKRenderer) error {
r.StartFrontMatter()
state := &parserState{}
return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}
switch node := n.(type) {
// ── Headings ──────────────────────────────────────────────────────────
case *ast.Heading:
if !entering {
return ast.WalkContinue, nil
}
title := extractPlainText(node, content)
if node.Level == 1 && r.numType == NumRoman {
title := extractText(node, content)
if title != "Vorwort" && title != "Abkürzungsverzeichnis" {
if !isFrontMatterSection(title) {
r.StartMainBody()
}
}
title := extractText(node, content)
r.RenderHeader(node.Level, title)
return ast.WalkSkipChildren, nil
// ── Paragraphs ────────────────────────────────────────────────────────
case *ast.Paragraph:
text := extractText(node, content)
lines := strings.Split(text, "\n")
isMeta := false
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "@Quelle:") {
r.AddSource(strings.TrimPrefix(line, "@Quelle:"))
isMeta = true
} else if strings.HasPrefix(line, "@Anhang:") {
r.AddAppendix(strings.TrimPrefix(line, "@Anhang:"))
isMeta = true
} else if strings.HasPrefix(line, "@AnhangUML:") {
state.nextCodeIsAppendix = true
state.appendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangUML:"))
isMeta = true
}
if !entering {
return ast.WalkContinue, nil
}
if isMeta {
plain := extractPlainText(node, content)
// Special directives embedded in paragraphs
if handled := handleDirectives(plain, state, r); handled {
return ast.WalkSkipChildren, nil
}
r.RenderParagraph(text)
spans := extractInlineSpans(node, content)
r.RenderParagraphSpans(spans)
return ast.WalkSkipChildren, nil
// ── Fenced code blocks ────────────────────────────────────────────────
case *ast.FencedCodeBlock:
if !entering {
return ast.WalkContinue, nil
}
lang := string(node.Language(content))
code := extractCode(node, content)
code := extractCodeBlock(node, content)
if lang == "mermaid" || lang == "plantuml" || lang == "puml" {
imgPath, err := RenderDiagramViaKroki(lang, code)
if err == nil {
caption := lang
if state.nextCodeIsAppendix {
r.AddAppendix(state.appendixTitle + " | " + imgPath)
state.nextCodeIsAppendix = false
} else {
r.RenderImage(imgPath, "Diagramm: "+lang)
r.RenderImage(imgPath, "Diagram ("+caption+")")
}
return ast.WalkSkipChildren, nil
}
// Fall through: render as plain code block on error
}
case *ast.Image:
imgPath := string(node.Destination)
title := string(node.Title)
r.RenderImage(imgPath, title)
// Render non-diagram code blocks as monospace paragraphs
r.RenderParagraphSpans([]InlineSpan{{Text: code, Code: true}})
return ast.WalkSkipChildren, nil
// ── Images ────────────────────────────────────────────────────────────
case *ast.Image:
if !entering {
return ast.WalkContinue, nil
}
imgPath := string(node.Destination)
caption := extractPlainText(node, content)
if caption == "" {
caption = string(node.Title)
}
r.RenderImage(imgPath, caption)
return ast.WalkSkipChildren, nil
// ── Block quotes (alternative @Quelle syntax) ─────────────────────────
case *ast.Blockquote:
// Check if first paragraph starts with "Quelle:"
if !entering {
return ast.WalkContinue, nil
}
first := node.FirstChild()
if first != nil {
if para, ok := first.(*ast.Paragraph); ok {
pText := extractText(para, content)
pText := extractPlainText(para, content)
if strings.HasPrefix(pText, "Quelle:") || strings.HasPrefix(pText, "Source:") {
sourceText := strings.TrimPrefix(pText, "Quelle:")
sourceText = strings.TrimPrefix(sourceText, "Source:")
r.AddSource(sourceText)
src := strings.TrimPrefix(strings.TrimPrefix(pText, "Quelle:"), "Source:")
r.AddSource(src)
return ast.WalkSkipChildren, nil
}
}
}
return ast.WalkContinue, nil
// ── Lists ─────────────────────────────────────────────────────────────
case *ast.List:
// Items will be handled by ListItem
if entering {
state.listStack = append(state.listStack, listFrame{ordered: node.IsOrdered()})
} else {
if len(state.listStack) > 0 {
state.listStack = state.listStack[:len(state.listStack)-1]
}
}
return ast.WalkContinue, nil
case *ast.ListItem:
text := extractText(node, content)
r.RenderListItem(text, true, 0) // Basic bullet point for now
if !entering {
return ast.WalkContinue, nil
}
depth := len(state.listStack) - 1
if depth < 0 {
depth = 0
}
frame := &state.listStack[depth]
frame.index++
spans := extractInlineSpansFromListItem(node, content)
r.RenderListItem(spans, frame.ordered, frame.index, depth)
return ast.WalkSkipChildren, nil
// ── Tables ────────────────────────────────────────────────────────────
case *extast.Table:
if !entering {
return ast.WalkContinue, nil
}
var tableData [][]string
for row := node.FirstChild(); row != nil; row = row.NextSibling() {
var rowData []string
for cell := row.FirstChild(); cell != nil; cell = cell.NextSibling() {
rowData = append(rowData, extractText(cell, content))
rowData = append(rowData, extractPlainText(cell, content))
}
tableData = append(tableData, rowData)
}
r.RenderTable(tableData)
r.RenderTable(tableData, "")
return ast.WalkSkipChildren, nil
}
return ast.WalkContinue, nil
})
}
func extractText(n ast.Node, content []byte) string {
var textStr string
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
if textNode, ok := child.(*ast.Text); ok {
textStr += string(textNode.Segment.Value(content))
if textNode.HardLineBreak() || textNode.SoftLineBreak() {
textStr += "\n"
}
} else {
textStr += extractText(child, content)
}
// isFrontMatterSection returns true for level-1 headings that belong to the
// Roman-numbered front matter (before the main body begins).
func isFrontMatterSection(title string) bool {
switch strings.TrimSpace(title) {
case "Vorwort", "Einleitung", "Abkürzungsverzeichnis":
return true
}
if textStr == "" {
// Fallback for simple nodes
return string(n.Text(content))
}
return textStr
// Everything else (including numbered sections like "1. Problem Statement")
// belongs to the Arabic-numbered main body.
return false
}
func extractCode(n *ast.FencedCodeBlock, content []byte) string {
// handleDirectives processes special @-prefixed control lines in a paragraph.
// Returns true if the paragraph was fully consumed as a directive.
func handleDirectives(text string, state *parserState, r *IHKRenderer) bool {
lines := strings.Split(text, "\n")
handled := false
for _, line := range lines {
line = strings.TrimSpace(line)
switch {
case strings.HasPrefix(line, "@Quelle:"):
r.AddSource(strings.TrimSpace(strings.TrimPrefix(line, "@Quelle:")))
handled = true
case strings.HasPrefix(line, "@Anhang:"):
r.AddAppendix(strings.TrimSpace(strings.TrimPrefix(line, "@Anhang:")))
handled = true
case strings.HasPrefix(line, "@AnhangUML:"):
state.nextCodeIsAppendix = true
state.appendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangUML:"))
handled = true
}
}
return handled
}
// extractPlainText returns the plain-text content of an AST node by
// recursively concatenating all text leaf nodes.
func extractPlainText(n ast.Node, content []byte) string {
var sb strings.Builder
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
if t, ok := child.(*ast.Text); ok {
sb.Write(t.Segment.Value(content))
if t.SoftLineBreak() || t.HardLineBreak() {
sb.WriteByte('\n')
}
} else {
sb.WriteString(extractPlainText(child, content))
}
}
if sb.Len() == 0 {
return string(n.Text(content))
}
return sb.String()
}
// extractInlineSpans walks the children of a paragraph node and builds a
// slice of InlineSpan values that preserve bold, italic, and code formatting.
func extractInlineSpans(n ast.Node, content []byte) []InlineSpan {
var spans []InlineSpan
walkInline(n, content, false, false, false, &spans)
return spans
}
// extractInlineSpansFromListItem extracts spans from the first paragraph child
// of a list item, which is how Goldmark represents list item content.
func extractInlineSpansFromListItem(item *ast.ListItem, content []byte) []InlineSpan {
for child := item.FirstChild(); child != nil; child = child.NextSibling() {
if _, ok := child.(*ast.Paragraph); ok {
return extractInlineSpans(child, content)
}
// TextBlock is used for tight lists
return extractInlineSpans(child, content)
}
return nil
}
// walkInline recursively collects InlineSpan values from an AST subtree,
// propagating bold/italic context down through Emphasis nodes.
func walkInline(n ast.Node, content []byte, bold, italic, code bool, out *[]InlineSpan) {
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
switch c := child.(type) {
case *ast.Text:
seg := string(c.Segment.Value(content))
if c.SoftLineBreak() {
seg += " "
} else if c.HardLineBreak() {
seg += "\n"
}
if seg != "" {
*out = append(*out, InlineSpan{Text: seg, Bold: bold, Italic: italic, Code: code})
}
case *ast.Emphasis:
childBold := bold || c.Level == 2
childItalic := italic || c.Level == 1
walkInline(c, content, childBold, childItalic, code, out)
case *ast.CodeSpan:
raw := string(c.Text(content))
*out = append(*out, InlineSpan{Text: raw, Bold: bold, Italic: italic, Code: true})
case *ast.Link:
// Render link text; the href is not shown (no footnote support yet).
walkInline(c, content, bold, italic, code, out)
default:
walkInline(c, content, bold, italic, code, out)
}
}
}
// extractCodeBlock returns the raw source text of a fenced code block.
func extractCodeBlock(n *ast.FencedCodeBlock, content []byte) string {
var buf bytes.Buffer
for i := 0; i < n.Lines().Len(); i++ {
line := n.Lines().At(i)
@@ -178,45 +316,3 @@ func extractCode(n *ast.FencedCodeBlock, content []byte) string {
}
return buf.String()
}
func RenderDiagramViaKroki(lang string, code string) (string, error) {
if lang == "puml" {
lang = "plantuml"
}
// Kroki encoding: zlib + base64url
var b bytes.Buffer
w := zlib.NewWriter(&b)
w.Write([]byte(code))
w.Close()
encoded := base64.URLEncoding.EncodeToString(b.Bytes())
url := fmt.Sprintf("https://kroki.io/%s/png/%s", lang, encoded)
// Cache based on hash
hash := fmt.Sprintf("%x", sha256.Sum256([]byte(code)))
cachePath := filepath.Join(os.TempDir(), "ihk_cache_"+hash+".png")
if _, err := os.Stat(cachePath); err == nil {
return cachePath, nil
}
resp, err := http.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return "", fmt.Errorf("kroki error: %d", resp.StatusCode)
}
out, err := os.Create(cachePath)
if err != nil {
return "", err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
return cachePath, err
}