electronjs.org-new/scripts/tasks/md-fixers.ts

//@ts-check

import fs from 'fs-extra';
import path from 'path';
import globby from 'globby';

import logger from '@docusaurus/logger';

/**
 * RegExp use to match the old markdown format for fiddle
 * in `fiddleTransformer`.
 */
const fiddleRegex = /^```javascript fiddle='docs\/(\S+)?'$/;
const fiddlePathFixRegex = /```fiddle docs\//;

/**
 * Updates the markdown fiddle format from:
 * ```
 * ```javascript fiddle='docs/fiddles/screen/fit-screen'
 * ```
 * To
 * ```
 * ```fiddle docs/latest/fiddles/example
 * ```
 * @param line
 */
const fiddleTransformer = (line: string) => {
  const matches = fiddleRegex.exec(line);
  const hasNewPath = fiddlePathFixRegex.test(line);

  if (matches) {
    return `\`\`\`fiddle docs/latest/${matches[1]}`;
  } else if (hasNewPath) {
    return (
      line
        .replace(fiddlePathFixRegex, '```fiddle docs/latest/')
        // we could have a double transformation if the path is already the good one
        // this happens especially with the i18n content
        .replace('latest/latest', 'latest')
    );
  } else {
    return line;
  }
};

/**
 * Crowdin translations put markdown content right
 * after HTML comments and thus breaking Docusaurus
 * parse engine. We need to add a new EOL after `-->`
 * is found.
 * @param line
 */
const newLineOnHTMLComment = (line: string) => {
  // The `startsWith('*')` part is to prevent messing the document `api/native-theme.md` 😓
  if (line.includes('-->') && !line.endsWith('-->') && !line.startsWith('*')) {
    return line.replace('-->', '-->\n');
  }
  return line;
};

/**
 * Crowdin needs extra blank lines surrounding the admonition characters so it doesn't
 * break Docusaurus with the translated content.
 * @param line
 */
const newLineOnAdmonition = (line: string) => {
  if (line.trim().startsWith(':::') || line.trim().endsWith(':::')) {
    return `\n${line.trim()}\n`;
  }

  return line;
};

/**
 * MDX requires </details> tag to be on its own line for some reason.
 * @param line
 */
const newLineOnDetails = (line: string) => {
  if (line.trim().endsWith(' </details>')) {
    const restOfContent = line.trim().split(' </details>')[0];
    return `${restOfContent}\n</details>`;
  }

  return line;
};

/**
 * MDX requires <img> tags to be closed (e.g. <img/>).
 * This fixer isn't perfect and only works for <img> tags that take up a whole line.
 * @param line
 */
const noUnclosedImageTags = (line: string) => {
  if (line.match(/^(<img[^>]+)(?<!\/)>$/)) {
    return `${line.slice(0, -1)}/>`;
  } else {
    return line;
  }
};

/**
 * Applies any transformation that can be executed line by line on
 * the document to make sure it is ready to be consumed by
 * docusaurus and our md extensions:
 * * Fix types on regular text
 * * Update the fiddle format
 * @param doc
 */
const transform = (doc: string) => {
  const lines = doc.split('\n');
  const newDoc = [];
  const transformers = [
    fiddleTransformer,
    newLineOnHTMLComment,
    newLineOnAdmonition,
    newLineOnDetails,
    noUnclosedImageTags,
  ];

  for (const line of lines) {
    const newLine = transformers.reduce((newLine, transformer) => {
      return transformer(newLine);
    }, line);

    newDoc.push(newLine);
  }

  return newDoc.join('\n');
};

/**
 * Does a best effort to fix internal links
 * @param content
 * @param linksMaps
 */
const fixLinks = (content: string, linksMaps: Map<string, string>) => {
  /**
   * This regex should match the following examples:
   * * [link label]: ./somewhere.md
   * * [link label]:../anywhere
   * * [link label]: nowhere
   * * [link](./somewhere.md)
   * * [link](../anywhere)
   * * [link](nowhere)
   * * [link](https://github.com/electron/electron/blob/HEAD/path-to-file/file.md)
   * * [link]: https://github.com/electron/electron/
   * * [link]:https://another.place/
   *
   * The 2nd group contains the link.
   * See https://regex101.com/r/i40SRL/1 for testing
   */
  let updatedContent = content;
  const mdLinkRegex = /(]:\s*|]\()(\S*?)?(?:\s|$|\))/gi;
  let val: RegExpExecArray;

  while ((val = mdLinkRegex.exec(content)) !== null) {
    const link = val[2];

    // Don't map links from outside the electron docs
    if (
      link.startsWith('https://') &&
      !link.includes('github.com/electron/electron/')
    ) {
      continue;
    }

    // Link could be `glossary.md#main-process` and we just need `glossary.md`
    const basename = path.basename(link);
    const parts = basename.split('#');
    const key = parts.shift();

    if (key && linksMaps.has(key)) {
      const newLink = [linksMaps.get(key), ...parts];
      const replacement = val[0].replace(val[2], newLink.join('#'));
      updatedContent = updatedContent.replace(val[0], replacement);
    }
  }

  /**
   * Docusaurus has a problem when the title of an image spawns multiple lines. E.g.:
   *
   * ```md
   * ![This is an
   * image](path/to/image)
   * ```
   *
   * Surprisingly, it has no problem with multiline regular links 🤷‍♂️
   * */
  const multilineImageTitle = /(?:!\[([^\]]+?)\])\(/gm;
  while ((val = multilineImageTitle.exec(updatedContent)) !== null) {
    const title = val[1];
    if (!title.includes('\n')) {
      continue;
    }
    const fixedTitle = title.replace(/\n/g, ' ');
    updatedContent = updatedContent.replace(val[1], fixedTitle);
  }

  return updatedContent;
};

/**
 * Removes unnecessary extra blank lines
 * @param content
 */
const fixReturnLines = (content: string) => {
  return content.replace(/\n\n(\n)+/g, '\n\n');
};

/**
 * Inline API structure content if a link URL query parameter is ?inline.
 *
 * This will place the content of the structure (minus the document header)
 * on the line following the link. If the line with the link is a list, the
 * inlined content will be indented so that it is the next level in the list.
 *
 * Fairly heavy on assumptions and heuristics about how the docs are laid out
 * so this code may be fragile to upstream changes.
 *
 * @param filePath
 * @param content
 */
const inlineApiStructures = async (filePath: string, content: string) => {
  // This is a modified version of the regex in `fixLinks`
  const inlineApiStructureRegex = /\[\S+(?:]\()((\S*?)\?inline)?(?:\s|$|\))/g;

  // This is from vscode-markdown-languageservice
  const linkDefinitionPattern =
    /^([\t ]*\[(?!\^)((?:\\\]|[^\]])+)\]:\s*)([^<]\S*|<[^>]+>)/gm;

  let updatedContent = content;

  for (const val of content.matchAll(inlineApiStructureRegex)) {
    const link = val[2];

    // Don't consider links from outside the electron docs
    if (
      link.startsWith('https://') &&
      !link.includes('github.com/electron/electron/')
    ) {
      continue;
    }

    logger.info(
      `Inlining API structure content for '${logger.green(
        link
      )}' in ${logger.green(filePath)}`
    );

    try {
      // Recursively inline to ensure all inline links have been inlined
      const apiStructureFilePath = path.join(path.dirname(filePath), link);
      let apiStructureContent = await inlineApiStructures(
        apiStructureFilePath,
        await fs.readFile(apiStructureFilePath, 'utf-8')
      );

      // Strip the header if there is one
      if (apiStructureContent.match(/^# /m)) {
        const headerIdx = apiStructureContent.match(/^# /m).index;
        const firstNewline = apiStructureContent.indexOf('\n', headerIdx);
        apiStructureContent = apiStructureContent.slice(
          apiStructureContent.indexOf('\n', firstNewline + 1) + 1
        );
      }

      const indexOfLineStart = updatedContent.lastIndexOf('\n', val.index) + 1;
      const indexOfLineEnd =
        val.index + updatedContent.slice(val.index).indexOf('\n');
      const line = updatedContent.slice(indexOfLineStart, indexOfLineEnd);

      // The line with the link is a list item
      if (line.trim().startsWith('*')) {
        const indentation = line.indexOf('*');

        if (![0, 2, 4, 6].includes(indentation)) {
          throw new Error(
            'Expected an indentation level of 0, 2, 4, or 6 for list item'
          );
        }

        // Assume list indentation is a multiple of 2, should be enforced by
        // upstream linter. Increase the indentation of the API structure
        // content by two spaces for the list of properties, which is presumed
        // to be the first block in the document after the header, which ends
        // when there's a blank line, or end of file
        let initialPropsSection = true;

        const lines = apiStructureContent.split('\n');
        apiStructureContent = lines
          .map((line) => {
            if (line.trim() === '') {
              initialPropsSection = false;
            }

            return initialPropsSection
              ? `${' '.repeat(indentation + 2)}${line}`
              : line;
          })
          .join('\n');
      }

      // Pull out any reference link definitions so they don't interfere
      // with list indentation when inlining the structure properties
      const apiStructureContentLines = apiStructureContent.split('\n');
      const referenceLinkDefinitions = apiStructureContentLines.filter((line) =>
        line.match(linkDefinitionPattern)
      );

      if (referenceLinkDefinitions.length) {
        apiStructureContent = apiStructureContentLines
          .filter((line) => !line.match(linkDefinitionPattern))
          .join('\n');
      }

      // Insert the API structure content
      const preContent = updatedContent.slice(0, indexOfLineEnd);
      const postContent = updatedContent.slice(indexOfLineEnd + 1);
      updatedContent =
        preContent + '\n' + apiStructureContent.trimEnd() + '\n' + postContent;

      // Replace the special link to strip off the ?inline query parameter
      updatedContent = updatedContent.replace(val[1], val[2]);

      // Place any reference links from API structure content at end
      if (referenceLinkDefinitions.length) {
        updatedContent =
          updatedContent + '\n' + referenceLinkDefinitions.join('\n') + '\n';
      }
    } catch (err) {
      logger.error(
        `Error inlining API structure link in file ${filePath}: ${err}`
      );
    }
  }

  return updatedContent;
};

/**
 * The current doc's format on `electron/electron` cannot be used
 * directly by docusaurus. This function transform all the md files
 * found in the given `root` (recursively) and makes sure they are
 * ready to consumed by the website.
 * @param root
 * @param version
 */
export const fixContent = async (root: string, version = 'latest') => {
  const files = await globby(`${version}/**/*.md`, {
    cwd: root,
  });

  /**
   * Filenames in Electron docs are usually unique so best effort
   * consist on using the filename (basename) to identify the right
   * place where it should point.
   */
  const linksMaps = new Map();
  for (const filePath of files) {
    linksMaps.set(path.basename(filePath), filePath);
  }

  for (const filePath of files) {
    const fullFilePath = path.join(root, filePath);
    const content = await fs.readFile(fullFilePath, 'utf-8');

    // Inline API structures first so all other fixes affect them
    let fixedContent = await inlineApiStructures(fullFilePath, content);

    fixedContent = transform(fixedContent);

    // These analyze the document globally instead of line by line,
    // thus why they cannot be part of `transform`
    fixedContent = fixReturnLines(fixLinks(fixedContent, linksMaps));

    await fs.writeFile(path.join(root, filePath), fixedContent, 'utf-8');
  }
};