test: add tests for file-loaders packages (#9678)

add tests
This commit is contained in:
Arvin Xu
2025-10-12 13:43:08 +02:00
committed by GitHub
parent d0e2acac13
commit 3cf7df5748
9 changed files with 226 additions and 1 deletions

View File

@@ -4,7 +4,6 @@ import { beforeEach, describe, expect, it } from 'vitest';
import type { FileLoaderInterface } from '../../types'; import type { FileLoaderInterface } from '../../types';
import { DocxLoader } from './index'; import { DocxLoader } from './index';
// 确保你已经在 fixtures 目录下放置了 test.docx 文件
const fixturePath = (filename: string) => path.join(__dirname, `./fixtures/${filename}`); const fixturePath = (filename: string) => path.join(__dirname, `./fixtures/${filename}`);
let loader: FileLoaderInterface; let loader: FileLoaderInterface;

View File

@@ -22,6 +22,36 @@ exports[`ExcelLoader > should aggregate content correctly (joining sheets) > agg
</sheet>" </sheet>"
`; `;
exports[`ExcelLoader > should handle Excel file with only headers > only_header_pages 1`] = `
[
{
"charCount": 124,
"lineCount": 3,
"metadata": {
"sheetName": "表1",
},
"pageContent": "| 表格 1 | __EMPTY | __EMPTY_1 | __EMPTY_2 | __EMPTY_3 |
| --- | --- | --- | --- | --- |
| header1 | header2 | header3 | | |",
},
{
"charCount": 231,
"lineCount": 8,
"metadata": {
"sheetName": "表2 - 表格 2",
},
"pageContent": "| 表格 2 | __EMPTY | __EMPTY_1 | __EMPTY_2 | __EMPTY_3 |
| --- | --- | --- | --- | --- |
| | 类别 A | 类别 B | | |
| 项目 1 | 5 | 7 | | |
| 项目 2 | 10 | 8 | | |
| 项目 3 | 9 | 15 | | |
| 项目 4 | 7 | 12 | | |
| 项目 5 | 16 | 21 | | |",
},
]
`;
exports[`ExcelLoader > should load pages correctly from an Excel file (one page per sheet) 1`] = ` exports[`ExcelLoader > should load pages correctly from an Excel file (one page per sheet) 1`] = `
[ [
{ {

View File

@@ -44,4 +44,12 @@ describe('ExcelLoader', () => {
expect(pages[0].pageContent).toBe(''); expect(pages[0].pageContent).toBe('');
expect(pages[0].metadata.error).toContain('Failed to load Excel file'); expect(pages[0].metadata.error).toContain('Failed to load Excel file');
}); });
it('should handle Excel file with only headers', async () => {
const onlyHeaderFile = fixturePath('only-header.xlsx');
const pages = await loader.loadPages(onlyHeaderFile);
expect(pages.length).toBeGreaterThan(0);
expect(pages[0].pageContent).toBeTruthy(); // 应该包含表头内容
expect(pages).toMatchSnapshot('only_header_pages');
});
}); });

View File

@@ -44,4 +44,29 @@ describe('PptxLoader', () => {
expect(pages[0].pageContent).toBe(''); expect(pages[0].pageContent).toBe('');
expect(pages[0].metadata.error).toContain('Failed to load or process PPTX file:'); // Update error message check expect(pages[0].metadata.error).toContain('Failed to load or process PPTX file:'); // Update error message check
}); });
it('should handle corrupted slide XML', async () => {
const corruptedFile = fixturePath('corrupted-slides.pptx');
const pages = await loader.loadPages(corruptedFile);
expect(pages).toHaveLength(1);
expect(pages[0].pageContent).toBe('');
expect(pages[0].metadata.error).toContain('All slides failed to parse correctly');
});
it('should handle aggregateContent with all error pages', async () => {
const corruptedFile = fixturePath('corrupted-slides.pptx');
const pages = await loader.loadPages(corruptedFile);
const content = await loader.aggregateContent(pages);
expect(content).toBe(''); // 所有页面都是错误页面时返回空字符串
});
it('should handle empty PPTX file with no slides', async () => {
const emptyFile = fixturePath('empty-slides.pptx');
const pages = await loader.loadPages(emptyFile);
expect(pages).toHaveLength(1);
expect(pages[0].pageContent).toBe('');
expect(pages[0].metadata.error).toContain(
'No slides found. The PPTX file might be empty, corrupted, or does not contain standard slide XMLs.',
);
});
}); });

View File

@@ -31,6 +31,20 @@ describe('parser-utils', () => {
); );
}); });
it('should handle corrupted file error with Buffer input', async () => {
vi.doMock('yauzl', () => ({
default: {
fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any) => void) => {
cb(new Error('corrupted'));
},
},
}));
const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
await expect(mockedExtractFiles(Buffer.from('corrupted'), () => true)).rejects.toThrow();
});
it('should read entries via yauzl.fromBuffer and filter matches', async () => { it('should read entries via yauzl.fromBuffer and filter matches', async () => {
// Arrange: build a fake zipfile object with two file entries and one directory // Arrange: build a fake zipfile object with two file entries and one directory
const entryHandlers: Record<string, (cb: () => void) => void> = {}; const entryHandlers: Record<string, (cb: () => void) => void> = {};
@@ -151,5 +165,146 @@ describe('parser-utils', () => {
const files = await mockedExtractFiles('/tmp/file.zip', (name) => name === 'keep.txt'); const files = await mockedExtractFiles('/tmp/file.zip', (name) => name === 'keep.txt');
expect(files).toEqual([{ path: 'keep.txt', content: 'A' }]); expect(files).toEqual([{ path: 'keep.txt', content: 'A' }]);
}); });
it('should handle openReadStream error', async () => {
const listeners: Record<string, Function[]> = { entry: [], end: [], error: [] };
const emit = (name: string, payload?: any) =>
(listeners[name] || []).forEach((fn) => fn(payload));
const fakeZipfile = {
readEntry: vi.fn().mockImplementation(() => {
queueMicrotask(() => emit('entry', { fileName: 'test.txt' }));
}),
openReadStream: vi.fn((entry: any, cb: (err: any, stream?: any) => void) => {
cb(new Error('Failed to open stream'));
}),
on: vi.fn((evt: string, handler: Function) => {
listeners[evt] = listeners[evt] || [];
listeners[evt].push(handler);
}),
close: vi.fn(),
} as any;
vi.doMock('yauzl', () => ({
default: {
fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any, zf?: any) => void) =>
cb(null, fakeZipfile),
},
}));
const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
await expect(mockedExtractFiles(Buffer.from('zip'), () => true)).rejects.toThrow(
'Failed to open stream',
);
});
it('should handle null readStream', async () => {
const listeners: Record<string, Function[]> = { entry: [], end: [], error: [] };
const emit = (name: string, payload?: any) =>
(listeners[name] || []).forEach((fn) => fn(payload));
const fakeZipfile = {
readEntry: vi.fn().mockImplementation(() => {
queueMicrotask(() => emit('entry', { fileName: 'test.txt' }));
}),
openReadStream: vi.fn((entry: any, cb: (err: any, stream?: any) => void) => {
cb(null, null); // readStream is null
}),
on: vi.fn((evt: string, handler: Function) => {
listeners[evt] = listeners[evt] || [];
listeners[evt].push(handler);
}),
close: vi.fn(),
} as any;
vi.doMock('yauzl', () => ({
default: {
fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any, zf?: any) => void) =>
cb(null, fakeZipfile),
},
}));
const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
await expect(mockedExtractFiles(Buffer.from('zip'), () => true)).rejects.toThrow(
'Could not open read stream',
);
});
it('should handle readStream error', async () => {
const listeners: Record<string, Function[]> = { entry: [], end: [], error: [] };
const streamListeners: Record<string, Function[]> = { error: [] };
const emit = (name: string, payload?: any) =>
(listeners[name] || []).forEach((fn) => fn(payload));
const fakeZipfile = {
readEntry: vi.fn().mockImplementation(() => {
queueMicrotask(() => emit('entry', { fileName: 'test.txt' }));
}),
openReadStream: vi.fn((entry: any, cb: (err: any, stream?: any) => void) => {
const stream = {
pipe: vi.fn().mockReturnThis(),
on: vi.fn((evt: string, handler: Function) => {
streamListeners[evt] = streamListeners[evt] || [];
streamListeners[evt].push(handler);
// Immediately trigger error
if (evt === 'error') {
queueMicrotask(() => handler(new Error('Stream error')));
}
}),
};
cb(null, stream);
}),
on: vi.fn((evt: string, handler: Function) => {
listeners[evt] = listeners[evt] || [];
listeners[evt].push(handler);
}),
close: vi.fn(),
} as any;
vi.doMock('yauzl', () => ({
default: {
fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any, zf?: any) => void) =>
cb(null, fakeZipfile),
},
}));
const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
await expect(mockedExtractFiles(Buffer.from('zip'), () => true)).rejects.toThrow(
'Stream error',
);
});
it('should handle zipfile error', async () => {
const listeners: Record<string, Function[]> = { entry: [], end: [], error: [] };
const emit = (name: string, payload?: any) =>
(listeners[name] || []).forEach((fn) => fn(payload));
const fakeZipfile = {
readEntry: vi.fn().mockImplementation(() => {
queueMicrotask(() => emit('error', new Error('Zipfile error')));
}),
on: vi.fn((evt: string, handler: Function) => {
listeners[evt] = listeners[evt] || [];
listeners[evt].push(handler);
}),
close: vi.fn(),
} as any;
vi.doMock('yauzl', () => ({
default: {
fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any, zf?: any) => void) =>
cb(null, fakeZipfile),
},
}));
const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
await expect(mockedExtractFiles(Buffer.from('zip'), () => true)).rejects.toThrow(
'Zipfile error',
);
});
}); });
}); });

View File

@@ -3,6 +3,14 @@ import { defineConfig } from 'vitest/config';
export default defineConfig({ export default defineConfig({
test: { test: {
coverage: { coverage: {
exclude: [
'**/types.ts',
'**/types/**',
'**/*.d.ts',
'**/test/setup.ts',
'**/vitest.config.*',
'**/node_modules/**',
],
reporter: ['text', 'json', 'lcov', 'text-summary'], reporter: ['text', 'json', 'lcov', 'text-summary'],
}, },
environment: 'happy-dom', environment: 'happy-dom',