package main import ( "bytes" "fmt" "log" "os" "strings" "github.com/yuin/goldmark" meta "github.com/yuin/goldmark-meta" "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/extension" extast "github.com/yuin/goldmark/extension/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/text" "gopkg.in/yaml.v3" ) // ParseMarkdown reads a Markdown file, extracts the YAML front matter into a // Config and returns the parsed AST together with the raw source bytes. func ParseMarkdown(mdPath string) (Config, ast.Node, []byte, error) { content, err := os.ReadFile(mdPath) if err != nil { return Config{}, nil, nil, err } md := goldmark.New( goldmark.WithExtensions(meta.Meta, extension.Table), ) ctx := parser.NewContext() doc := md.Parser().Parse(text.NewReader(content), parser.WithContext(ctx)) metaData := meta.Get(ctx) var config Config raw, _ := yaml.Marshal(metaData) if err = yaml.Unmarshal(raw, &config); err != nil { return Config{}, nil, nil, fmt.Errorf("YAML front matter: %w", err) } return config, doc, content, nil } // parserState tracks transient state during the AST walk. type parserState struct { nextCodeIsAppendix bool nextAppendixLandscape bool // set by @AnhangUMLQuer: — landscape for diagram appendix appendixTitle string nextCodeBlockAppendix bool // set by @AnhangCode: — next non-diagram code block → appendix codeBlockAppendixTitle string nextTableCaption string // set by @Tabelle: directive nextTableIsAppendix bool // set by @TabelleAnhang: or @TabelleAnhangQuer: nextTableIsLandscape bool // set by @TabelleAnhangQuer: nextDiagramLandscape bool // set by @DiagrammQuer: directive nextDiagramCaption string // caption for the landscape diagram page listStack []listFrame // stack for nested list tracking } // listFrame tracks the type and item counter for one list nesting level. type listFrame struct { ordered bool index int } // RenderAST walks the Goldmark AST and dispatches to IHKRenderer methods. // // Front-matter detection: a level-1 heading that is not a numbered section // and has a name in the front-matter list ("Vorwort", "Abkürzungsverzeichnis") // stays in Roman-numeral territory. All other level-1 headings trigger // StartMainBody() and switch to Arabic page numbering. func RenderAST(doc ast.Node, content []byte, r *IHKRenderer) error { r.StartFrontMatter() state := &parserState{} return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { switch node := n.(type) { // ── Headings ────────────────────────────────────────────────────────── case *ast.Heading: if !entering { return ast.WalkContinue, nil } title := extractPlainText(node, content) if node.Level == 1 && r.numType == NumRoman { if !isFrontMatterSection(title) { r.StartMainBody() } } r.RenderHeader(node.Level, title) return ast.WalkSkipChildren, nil // ── Paragraphs ──────────────────────────────────────────────────────── case *ast.Paragraph: if !entering { return ast.WalkContinue, nil } plain := extractPlainText(node, content) // Special directives embedded in paragraphs if handled := handleDirectives(plain, state, r); handled { return ast.WalkSkipChildren, nil } spans := extractInlineSpans(node, content) r.RenderParagraphSpans(spans) return ast.WalkSkipChildren, nil // ── Fenced code blocks ──────────────────────────────────────────────── case *ast.FencedCodeBlock: if !entering { return ast.WalkContinue, nil } lang := string(node.Language(content)) code := extractCodeBlock(node, content) if lang == "mermaid" || lang == "plantuml" || lang == "puml" { imgPath, err := RenderDiagramViaKroki(lang, code) if err != nil { log.Printf("warning: diagram render failed (%s): %v — falling back to code block", lang, err) state.nextDiagramLandscape = false state.nextDiagramCaption = "" state.nextCodeIsAppendix = false state.nextAppendixLandscape = false } if err == nil { switch { case state.nextDiagramLandscape: r.RenderLandscapeDiagram(imgPath, state.nextDiagramCaption) state.nextDiagramLandscape = false state.nextDiagramCaption = "" case state.nextCodeIsAppendix: if state.nextAppendixLandscape { r.AddLandscapeAppendix(state.appendixTitle + " | " + imgPath) state.nextAppendixLandscape = false } else { r.AddAppendix(state.appendixTitle + " | " + imgPath) } state.nextCodeIsAppendix = false default: r.RenderImage(imgPath, "Diagram ("+lang+")") } return ast.WalkSkipChildren, nil } // Fall through: render as plain code block on error } if state.nextCodeBlockAppendix { r.AddCodeAppendix(state.codeBlockAppendixTitle, lang, code) state.nextCodeBlockAppendix = false state.codeBlockAppendixTitle = "" return ast.WalkSkipChildren, nil } // Render as a numbered code block (gutter + monospace body). r.RenderCodeBlock(lang, code) return ast.WalkSkipChildren, nil // ── Images ──────────────────────────────────────────────────────────── case *ast.Image: if !entering { return ast.WalkContinue, nil } imgPath := string(node.Destination) caption := extractPlainText(node, content) if caption == "" { caption = string(node.Title) } r.RenderImage(imgPath, caption) return ast.WalkSkipChildren, nil // ── Block quotes (alternative @Quelle syntax) ───────────────────────── case *ast.Blockquote: if !entering { return ast.WalkContinue, nil } first := node.FirstChild() if first != nil { if para, ok := first.(*ast.Paragraph); ok { pText := extractPlainText(para, content) if strings.HasPrefix(pText, "Quelle:") || strings.HasPrefix(pText, "Source:") { src := strings.TrimPrefix(strings.TrimPrefix(pText, "Quelle:"), "Source:") r.AddSource(src) return ast.WalkSkipChildren, nil } } } return ast.WalkContinue, nil // ── Lists ───────────────────────────────────────────────────────────── case *ast.List: if entering { state.listStack = append(state.listStack, listFrame{ordered: node.IsOrdered()}) } else { if len(state.listStack) > 0 { state.listStack = state.listStack[:len(state.listStack)-1] } // Add breathing room after the outermost list so the next // paragraph is not glued to the last bullet. if len(state.listStack) == 0 { r.pdf.Ln(dinSpaceAfterList) } } return ast.WalkContinue, nil case *ast.ListItem: if !entering { return ast.WalkContinue, nil } depth := len(state.listStack) - 1 if depth < 0 { depth = 0 } frame := &state.listStack[depth] frame.index++ spans := extractInlineSpansFromListItem(node, content) r.RenderListItem(spans, frame.ordered, frame.index, depth) return ast.WalkSkipChildren, nil // ── Tables ──────────────────────────────────────────────────────────── case *extast.Table: if !entering { return ast.WalkContinue, nil } var tableData [][]string for row := node.FirstChild(); row != nil; row = row.NextSibling() { var rowData []string for cell := row.FirstChild(); cell != nil; cell = cell.NextSibling() { rowData = append(rowData, extractPlainText(cell, content)) } tableData = append(tableData, rowData) } if state.nextTableIsAppendix { if state.nextTableIsLandscape { r.AddTableAppendixLandscape(state.nextTableCaption, tableData) state.nextTableIsLandscape = false } else { r.AddTableAppendix(state.nextTableCaption, tableData) } state.nextTableIsAppendix = false state.nextTableCaption = "" } else { caption := state.nextTableCaption state.nextTableCaption = "" r.RenderTable(tableData, caption) } return ast.WalkSkipChildren, nil } return ast.WalkContinue, nil }) } // isFrontMatterSection returns true for level-1 headings that belong to the // Roman-numbered front matter (before the main body begins). func isFrontMatterSection(title string) bool { switch strings.TrimSpace(title) { case "Vorwort", "Einleitung", "Abkürzungsverzeichnis": return true } // Everything else (including numbered sections like "1. Problem Statement") // belongs to the Arabic-numbered main body. return false } // handleDirectives processes special @-prefixed control lines in a paragraph. // Returns true if the paragraph was fully consumed as a directive. func handleDirectives(text string, state *parserState, r *IHKRenderer) bool { lines := strings.Split(text, "\n") handled := false for _, line := range lines { line = strings.TrimSpace(line) switch { case strings.HasPrefix(line, "@Quelle:"): r.AddSource(strings.TrimSpace(strings.TrimPrefix(line, "@Quelle:"))) handled = true case strings.HasPrefix(line, "@AnhangCode:"): state.nextCodeBlockAppendix = true state.codeBlockAppendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangCode:")) handled = true case strings.HasPrefix(line, "@Anhang:"): r.AddAppendix(strings.TrimSpace(strings.TrimPrefix(line, "@Anhang:"))) handled = true case strings.HasPrefix(line, "@AnhangBildQuer:"): r.AddLandscapeAppendix(strings.TrimSpace(strings.TrimPrefix(line, "@AnhangBildQuer:"))) handled = true case strings.HasPrefix(line, "@AnhangUMLQuer:"): state.nextCodeIsAppendix = true state.nextAppendixLandscape = true state.appendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangUMLQuer:")) handled = true case strings.HasPrefix(line, "@AnhangUML:"): state.nextCodeIsAppendix = true state.appendixTitle = strings.TrimSpace(strings.TrimPrefix(line, "@AnhangUML:")) handled = true case strings.HasPrefix(line, "@TabelleAnhangQuer:"): state.nextTableIsAppendix = true state.nextTableIsLandscape = true state.nextTableCaption = strings.TrimSpace(strings.TrimPrefix(line, "@TabelleAnhangQuer:")) handled = true case strings.HasPrefix(line, "@TabelleAnhang:"): state.nextTableIsAppendix = true state.nextTableCaption = strings.TrimSpace(strings.TrimPrefix(line, "@TabelleAnhang:")) handled = true case strings.HasPrefix(line, "@Tabelle:"): state.nextTableCaption = strings.TrimSpace(strings.TrimPrefix(line, "@Tabelle:")) handled = true case strings.HasPrefix(line, "@DiagrammQuer:"): state.nextDiagramLandscape = true state.nextDiagramCaption = strings.TrimSpace(strings.TrimPrefix(line, "@DiagrammQuer:")) handled = true } } return handled } // extractPlainText returns the plain-text content of an AST node by // recursively concatenating all text leaf nodes. func extractPlainText(n ast.Node, content []byte) string { var sb strings.Builder for child := n.FirstChild(); child != nil; child = child.NextSibling() { if t, ok := child.(*ast.Text); ok { sb.Write(t.Segment.Value(content)) if t.SoftLineBreak() || t.HardLineBreak() { sb.WriteByte('\n') } } else { sb.WriteString(extractPlainText(child, content)) } } if sb.Len() == 0 { return string(n.Text(content)) } return sb.String() } // extractInlineSpans walks the children of a paragraph node and builds a // slice of InlineSpan values that preserve bold, italic, and code formatting. func extractInlineSpans(n ast.Node, content []byte) []InlineSpan { var spans []InlineSpan walkInline(n, content, false, false, false, &spans) return spans } // extractInlineSpansFromListItem extracts spans from the first paragraph child // of a list item, which is how Goldmark represents list item content. func extractInlineSpansFromListItem(item *ast.ListItem, content []byte) []InlineSpan { for child := item.FirstChild(); child != nil; child = child.NextSibling() { if _, ok := child.(*ast.Paragraph); ok { return extractInlineSpans(child, content) } // TextBlock is used for tight lists return extractInlineSpans(child, content) } return nil } // walkInline recursively collects InlineSpan values from an AST subtree, // propagating bold/italic context down through Emphasis nodes. func walkInline(n ast.Node, content []byte, bold, italic, code bool, out *[]InlineSpan) { for child := n.FirstChild(); child != nil; child = child.NextSibling() { switch c := child.(type) { case *ast.Text: seg := string(c.Segment.Value(content)) if c.SoftLineBreak() { seg += " " } else if c.HardLineBreak() { seg += "\n" } if seg != "" { *out = append(*out, InlineSpan{Text: seg, Bold: bold, Italic: italic, Code: code}) } case *ast.Emphasis: childBold := bold || c.Level == 2 childItalic := italic || c.Level == 1 walkInline(c, content, childBold, childItalic, code, out) case *ast.CodeSpan: raw := string(c.Text(content)) *out = append(*out, InlineSpan{Text: raw, Bold: bold, Italic: italic, Code: true}) case *ast.Link: // Render link text; the href is not shown (no footnote support yet). walkInline(c, content, bold, italic, code, out) default: walkInline(c, content, bold, italic, code, out) } } } // extractCodeBlock returns the raw source text of a fenced code block. func extractCodeBlock(n *ast.FencedCodeBlock, content []byte) string { var buf bytes.Buffer for i := 0; i < n.Lines().Len(); i++ { line := n.Lines().At(i) buf.Write(line.Value(content)) } return buf.String() }