Add basic HTML parser for converting strings to React components (#36071)

This commit is contained in:
Echo 2025-09-11 11:22:44 +02:00 committed by GitHub
commit 8a0d0025ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 347 additions and 0 deletions

View file

@ -0,0 +1,69 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`html > htmlStringToComponents > copies attributes to props 1`] = `
[
<a
href="https://example.com"
rel="nofollow"
target="_blank"
>
link
</a>,
]
`;
exports[`html > htmlStringToComponents > handles nested elements 1`] = `
[
<p>
lorem
<strong>
ipsum
</strong>
</p>,
]
`;
exports[`html > htmlStringToComponents > ignores empty text nodes 1`] = `
[
<p>
<span>
lorem ipsum
</span>
</p>,
]
`;
exports[`html > htmlStringToComponents > respects allowedTags option 1`] = `
[
<p>
lorem
<em>
dolor
</em>
</p>,
]
`;
exports[`html > htmlStringToComponents > respects maxDepth option 1`] = `
[
<p>
<span />
</p>,
]
`;
exports[`html > htmlStringToComponents > returns converted nodes from string 1`] = `
[
<p>
lorem ipsum
</p>,
]
`;
exports[`html > htmlStringToComponents > uses default parsing if onElement returns undefined 1`] = `
[
<p>
lorem ipsum
</p>,
]
`;

View file

@ -1,3 +1,5 @@
import React from 'react';
import * as html from '../html';
describe('html', () => {
@ -9,4 +11,104 @@ describe('html', () => {
expect(output).toEqual('lorem\n\nipsum\n<br>');
});
});
describe('htmlStringToComponents', () => {
it('returns converted nodes from string', () => {
const input = '<p>lorem ipsum</p>';
const output = html.htmlStringToComponents(input);
expect(output).toMatchSnapshot();
});
it('handles nested elements', () => {
const input = '<p>lorem <strong>ipsum</strong></p>';
const output = html.htmlStringToComponents(input);
expect(output).toMatchSnapshot();
});
it('ignores empty text nodes', () => {
const input = '<p> <span>lorem ipsum</span> </p>';
const output = html.htmlStringToComponents(input);
expect(output).toMatchSnapshot();
});
it('copies attributes to props', () => {
const input =
'<a href="https://example.com" target="_blank" rel="nofollow">link</a>';
const output = html.htmlStringToComponents(input);
expect(output).toMatchSnapshot();
});
it('respects maxDepth option', () => {
const input = '<p><span>lorem <strong>ipsum</strong></span></p>';
const output = html.htmlStringToComponents(input, { maxDepth: 2 });
expect(output).toMatchSnapshot();
});
it('calls onText callback', () => {
const input = '<p>lorem ipsum</p>';
const onText = vi.fn((text: string) => text);
html.htmlStringToComponents(input, { onText });
expect(onText).toHaveBeenCalledExactlyOnceWith('lorem ipsum');
});
it('calls onElement callback', () => {
const input = '<p>lorem ipsum</p>';
const onElement = vi.fn(
(element: HTMLElement, children: React.ReactNode[]) =>
React.createElement(element.tagName.toLowerCase(), {}, ...children),
);
html.htmlStringToComponents(input, { onElement });
expect(onElement).toHaveBeenCalledExactlyOnceWith(
expect.objectContaining({ tagName: 'P' }),
expect.arrayContaining(['lorem ipsum']),
);
});
it('uses default parsing if onElement returns undefined', () => {
const input = '<p>lorem ipsum</p>';
const onElement = vi.fn(() => undefined);
const output = html.htmlStringToComponents(input, { onElement });
expect(onElement).toHaveBeenCalledExactlyOnceWith(
expect.objectContaining({ tagName: 'P' }),
expect.arrayContaining(['lorem ipsum']),
);
expect(output).toMatchSnapshot();
});
it('calls onAttribute callback', () => {
const input =
'<a href="https://example.com" target="_blank" rel="nofollow">link</a>';
const onAttribute = vi.fn(
(name: string, value: string) =>
[name, value] satisfies [string, string],
);
html.htmlStringToComponents(input, { onAttribute });
expect(onAttribute).toHaveBeenCalledTimes(3);
expect(onAttribute).toHaveBeenCalledWith(
'href',
'https://example.com',
'a',
);
expect(onAttribute).toHaveBeenCalledWith('target', '_blank', 'a');
expect(onAttribute).toHaveBeenCalledWith('rel', 'nofollow', 'a');
});
it('respects allowedTags option', () => {
const input = '<p>lorem <strong>ipsum</strong> <em>dolor</em></p>';
const output = html.htmlStringToComponents(input, {
allowedTags: new Set(['p', 'em']),
});
expect(output).toMatchSnapshot();
});
it('ensure performance is acceptable with large input', () => {
const input = '<p>' + '<span>lorem</span>'.repeat(1_000) + '</p>';
const start = performance.now();
html.htmlStringToComponents(input);
const duration = performance.now() - start;
// Arbitrary threshold of 200ms for this test.
// Normally it's much less (<50ms), but the GH Action environment can be slow.
expect(duration).toBeLessThan(200);
});
});
});