-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmdcheck.mjs
157 lines (123 loc) · 4.26 KB
/
mdcheck.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!node
// Link Checker for .md files
import * as fs from 'fs';
import path from 'node:path';
const rootFolder = '.';
/// ignore these foldernames
const folderExclude = ['node_modules'];
/// further scan the files with extensions
const extensionsInclude = ['.md', '.svg', '.jpg', '.png', '.pdf'];
const debugLevel = true;
// all file names
let allFiles = [];
let countBroken = 0;
let externalLinks = 0;
function debug(v) {
if (debugLevel) console.debug(...arguments);
}
function info(v) {
console.log(v);
}
info('Markdown Internal Link Checker');
/// Recursively walk through all folders and files to collect all relevant filenames.
/// recursive is not used to enable early filtering on directory names and filetypes
/// avoiding huge buffers and unrequired scanning directories.
/// @param {String} folderName
/// @returns array of files and folders.
async function walk(folderName) {
const scans = await fs.promises.readdir(folderName, { withFileTypes: true })
.then((entries) => entries.flatMap((entry) => {
const childPath = path.join(folderName, entry.name);
if (entry.isDirectory()) {
if (!folderExclude.includes(entry.name)) {
// recursion on included subdirectories only.
return walk(childPath);
}
} else {
const ext = path.extname(entry.name);
if (extensionsInclude.includes(ext)) {
return childPath;
}
}
return [];
}));
return Promise.all(scans);
};
// simple markdown file parser to find links and picture reference in format [txt](link)
function parseMarkdown(fName) {
// debug('parsing ' + fName);
if (fName.indexOf("@Architecture\\index.md") >= 0)
debugger;
let fNameReported = false;
const fldr = path.dirname(fName);
// get file content as text
let textLines = fs.readFileSync(fName, { encoding: 'utf-8' }).replace(/\r/g, '').split('\n');
let insideCode = false;
let insideComment = false;
textLines = textLines
.flatMap((line) => {
if (line.startsWith('```')) {
insideCode = !insideCode;
} else if (line.startsWith('<!--')) {
insideComment = true;
} else if (insideComment && line.indexOf('-->') >= 0) {
insideComment = false;
} else if ((!insideCode) && (!insideComment)) {
return (line);
}
return ("");
});
textLines.forEach((lineText, lineNumber) => {
const m = lineText.matchAll(/!?\[[^\]]+\]\(([^")]+)/g);
for (const link of m) {
let target = link[1].trim();
if (target.indexOf('#') >= 0) {
target = target.split('#')[0];
}
if (target.startsWith('http://') || target.startsWith('https://')) {
// do not check internet links
externalLinks++;
} else if (target.indexOf('{{') >= 0) {
// do not check links with variables
} else if (target.length == 0) {
// nothing to check on empty internal links
} else {
let fullTarget = target;
fullTarget = decodeURIComponent(fullTarget);
if (fullTarget.startsWith('/')) {
fullTarget = path.resolve(fullTarget.slice(1));
} else {
fullTarget = path.resolve(fldr, fullTarget);
}
const n = allFiles.indexOf(fullTarget);
if (n >= 0) {
// console.log("found.");
} else {
if (!fNameReported) {
console.log(`${fName}`);
fNameReported = true;
}
console.log(` ${lineNumber + 1}: Broken link: (${target}) to (${fullTarget})`);
countBroken++;
}
}
}
});
if (fNameReported) {
console.log("");
}
} // parseMarkdown()
debug("root:", rootFolder, path.resolve(rootFolder));
allFiles = (await walk(rootFolder))
.flat(Number.POSITIVE_INFINITY)
.map(fn => path.resolve(fn));
console.log("");
allFiles
.filter(fName => path.extname(fName) == '.md')
.filter(fName => ! path.basename(fName).startsWith('_'))
.forEach(fName => {
parseMarkdown(fName);
});
console.log("");
console.log(` ${countBroken.toString().padStart(4, ' ')} Broken Links`);
console.log(` ${externalLinks.toString().padStart(4, ' ')} External Links`);