✅ test: add tests for file-loaders packages (#9678)

add tests
2025-12-20 01:12:52 +08:00 · 2025-10-12 13:43:08 +02:00
parent d0e2acac13
commit 3cf7df5748
9 changed files with 226 additions and 1 deletions
--- a/packages/file-loaders/src/loaders/docx/index.test.ts
+++ b/packages/file-loaders/src/loaders/docx/index.test.ts
@@ -4,7 +4,6 @@ import { beforeEach, describe, expect, it } from 'vitest';
 import type { FileLoaderInterface } from '../../types';
 import { DocxLoader } from './index';
 // 确保你已经在 fixtures 目录下放置了 test.docx 文件
 const fixturePath = (filename: string) => path.join(__dirname, `./fixtures/${filename}`);
 let loader: FileLoaderInterface;
--- a/packages/file-loaders/src/loaders/excel/snapshots/index.test.ts.snap
+++ b/packages/file-loaders/src/loaders/excel/snapshots/index.test.ts.snap
@@ -22,6 +22,36 @@ exports[`ExcelLoader > should aggregate content correctly (joining sheets) > agg
 </sheet>"
 `;
 exports[`ExcelLoader > should handle Excel file with only headers > only_header_pages 1`] = `
 [
  {
    "charCount": 124,
    "lineCount": 3,
    "metadata": {
      "sheetName": "表1",
    },
    "pageContent": "| 表格 1 | __EMPTY | __EMPTY_1 | __EMPTY_2 | __EMPTY_3 |
 | --- | --- | --- | --- | --- |
 | header1 | header2 | header3 |  |  |",
  },
  {
    "charCount": 231,
    "lineCount": 8,
    "metadata": {
      "sheetName": "表2 - 表格 2",
    },
    "pageContent": "| 表格 2 | __EMPTY | __EMPTY_1 | __EMPTY_2 | __EMPTY_3 |
 | --- | --- | --- | --- | --- |
 |  | 类别 A | 类别 B |  |  |
 | 项目 1 | 5 | 7 |  |  |
 | 项目 2 | 10 | 8 |  |  |
 | 项目 3 | 9 | 15 |  |  |
 | 项目 4 | 7 | 12 |  |  |
 | 项目 5 | 16 | 21 |  |  |",
  },
 ]
 `;
 exports[`ExcelLoader > should load pages correctly from an Excel file (one page per sheet) 1`] = `
 [
  {
--- a/packages/file-loaders/src/loaders/excel/fixtures/only-header.xlsx
+++ b/packages/file-loaders/src/loaders/excel/fixtures/only-header.xlsx
--- a/packages/file-loaders/src/loaders/excel/index.test.ts
+++ b/packages/file-loaders/src/loaders/excel/index.test.ts
@@ -44,4 +44,12 @@ describe('ExcelLoader', () => {
    expect(pages[0].pageContent).toBe('');
    expect(pages[0].metadata.error).toContain('Failed to load Excel file');
  });
  it('should handle Excel file with only headers', async () => {
    const onlyHeaderFile = fixturePath('only-header.xlsx');
    const pages = await loader.loadPages(onlyHeaderFile);
    expect(pages.length).toBeGreaterThan(0);
    expect(pages[0].pageContent).toBeTruthy(); // 应该包含表头内容
    expect(pages).toMatchSnapshot('only_header_pages');
  });
 });
--- a/packages/file-loaders/src/loaders/pptx/fixtures/corrupted-slides.pptx
+++ b/packages/file-loaders/src/loaders/pptx/fixtures/corrupted-slides.pptx
--- a/packages/file-loaders/src/loaders/pptx/fixtures/empty-slides.pptx
+++ b/packages/file-loaders/src/loaders/pptx/fixtures/empty-slides.pptx
--- a/packages/file-loaders/src/loaders/pptx/index.test.ts
+++ b/packages/file-loaders/src/loaders/pptx/index.test.ts
@@ -44,4 +44,29 @@ describe('PptxLoader', () => {
    expect(pages[0].pageContent).toBe('');
    expect(pages[0].metadata.error).toContain('Failed to load or process PPTX file:'); // Update error message check
  });
  it('should handle corrupted slide XML', async () => {
    const corruptedFile = fixturePath('corrupted-slides.pptx');
    const pages = await loader.loadPages(corruptedFile);
    expect(pages).toHaveLength(1);
    expect(pages[0].pageContent).toBe('');
    expect(pages[0].metadata.error).toContain('All slides failed to parse correctly');
  });
  it('should handle aggregateContent with all error pages', async () => {
    const corruptedFile = fixturePath('corrupted-slides.pptx');
    const pages = await loader.loadPages(corruptedFile);
    const content = await loader.aggregateContent(pages);
    expect(content).toBe(''); // 所有页面都是错误页面时返回空字符串
  });
  it('should handle empty PPTX file with no slides', async () => {
    const emptyFile = fixturePath('empty-slides.pptx');
    const pages = await loader.loadPages(emptyFile);
    expect(pages).toHaveLength(1);
    expect(pages[0].pageContent).toBe('');
    expect(pages[0].metadata.error).toContain(
      'No slides found. The PPTX file might be empty, corrupted, or does not contain standard slide XMLs.',
    );
  });
 });
--- a/packages/file-loaders/src/utils/parser-utils.test.ts
+++ b/packages/file-loaders/src/utils/parser-utils.test.ts
@@ -31,6 +31,20 @@ describe('parser-utils', () => {
      );
    });
    it('should handle corrupted file error with Buffer input', async () => {
      vi.doMock('yauzl', () => ({
        default: {
          fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any) => void) => {
            cb(new Error('corrupted'));
          },
        },
      }));
      const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
      await expect(mockedExtractFiles(Buffer.from('corrupted'), () => true)).rejects.toThrow();
    });
    it('should read entries via yauzl.fromBuffer and filter matches', async () => {
      // Arrange: build a fake zipfile object with two file entries and one directory
      const entryHandlers: Record<string, (cb: () => void) => void> = {};
@@ -151,5 +165,146 @@ describe('parser-utils', () => {
      const files = await mockedExtractFiles('/tmp/file.zip', (name) => name === 'keep.txt');
      expect(files).toEqual([{ path: 'keep.txt', content: 'A' }]);
    });
    it('should handle openReadStream error', async () => {
      const listeners: Record<string, Function[]> = { entry: [], end: [], error: [] };
      const emit = (name: string, payload?: any) =>
        (listeners[name] || []).forEach((fn) => fn(payload));
      const fakeZipfile = {
        readEntry: vi.fn().mockImplementation(() => {
          queueMicrotask(() => emit('entry', { fileName: 'test.txt' }));
        }),
        openReadStream: vi.fn((entry: any, cb: (err: any, stream?: any) => void) => {
          cb(new Error('Failed to open stream'));
        }),
        on: vi.fn((evt: string, handler: Function) => {
          listeners[evt] = listeners[evt] || [];
          listeners[evt].push(handler);
        }),
        close: vi.fn(),
      } as any;
      vi.doMock('yauzl', () => ({
        default: {
          fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any, zf?: any) => void) =>
            cb(null, fakeZipfile),
        },
      }));
      const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
      await expect(mockedExtractFiles(Buffer.from('zip'), () => true)).rejects.toThrow(
        'Failed to open stream',
      );
    });
    it('should handle null readStream', async () => {
      const listeners: Record<string, Function[]> = { entry: [], end: [], error: [] };
      const emit = (name: string, payload?: any) =>
        (listeners[name] || []).forEach((fn) => fn(payload));
      const fakeZipfile = {
        readEntry: vi.fn().mockImplementation(() => {
          queueMicrotask(() => emit('entry', { fileName: 'test.txt' }));
        }),
        openReadStream: vi.fn((entry: any, cb: (err: any, stream?: any) => void) => {
          cb(null, null); // readStream is null
        }),
        on: vi.fn((evt: string, handler: Function) => {
          listeners[evt] = listeners[evt] || [];
          listeners[evt].push(handler);
        }),
        close: vi.fn(),
      } as any;
      vi.doMock('yauzl', () => ({
        default: {
          fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any, zf?: any) => void) =>
            cb(null, fakeZipfile),
        },
      }));
      const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
      await expect(mockedExtractFiles(Buffer.from('zip'), () => true)).rejects.toThrow(
        'Could not open read stream',
      );
    });
    it('should handle readStream error', async () => {
      const listeners: Record<string, Function[]> = { entry: [], end: [], error: [] };
      const streamListeners: Record<string, Function[]> = { error: [] };
      const emit = (name: string, payload?: any) =>
        (listeners[name] || []).forEach((fn) => fn(payload));
      const fakeZipfile = {
        readEntry: vi.fn().mockImplementation(() => {
          queueMicrotask(() => emit('entry', { fileName: 'test.txt' }));
        }),
        openReadStream: vi.fn((entry: any, cb: (err: any, stream?: any) => void) => {
          const stream = {
            pipe: vi.fn().mockReturnThis(),
            on: vi.fn((evt: string, handler: Function) => {
              streamListeners[evt] = streamListeners[evt] || [];
              streamListeners[evt].push(handler);
              // Immediately trigger error
              if (evt === 'error') {
                queueMicrotask(() => handler(new Error('Stream error')));
              }
            }),
          };
          cb(null, stream);
        }),
        on: vi.fn((evt: string, handler: Function) => {
          listeners[evt] = listeners[evt] || [];
          listeners[evt].push(handler);
        }),
        close: vi.fn(),
      } as any;
      vi.doMock('yauzl', () => ({
        default: {
          fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any, zf?: any) => void) =>
            cb(null, fakeZipfile),
        },
      }));
      const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
      await expect(mockedExtractFiles(Buffer.from('zip'), () => true)).rejects.toThrow(
        'Stream error',
      );
    });
    it('should handle zipfile error', async () => {
      const listeners: Record<string, Function[]> = { entry: [], end: [], error: [] };
      const emit = (name: string, payload?: any) =>
        (listeners[name] || []).forEach((fn) => fn(payload));
      const fakeZipfile = {
        readEntry: vi.fn().mockImplementation(() => {
          queueMicrotask(() => emit('error', new Error('Zipfile error')));
        }),
        on: vi.fn((evt: string, handler: Function) => {
          listeners[evt] = listeners[evt] || [];
          listeners[evt].push(handler);
        }),
        close: vi.fn(),
      } as any;
      vi.doMock('yauzl', () => ({
        default: {
          fromBuffer: (_buf: Buffer, _opts: any, cb: (err: any, zf?: any) => void) =>
            cb(null, fakeZipfile),
        },
      }));
      const { extractFiles: mockedExtractFiles } = await import('./parser-utils');
      await expect(mockedExtractFiles(Buffer.from('zip'), () => true)).rejects.toThrow(
        'Zipfile error',
      );
    });
  });
 });
--- a/packages/file-loaders/vitest.config.mts
+++ b/packages/file-loaders/vitest.config.mts
@@ -3,6 +3,14 @@ import { defineConfig } from 'vitest/config';
 export default defineConfig({
  test: {
    coverage: {
      exclude: [
        '**/types.ts',
        '**/types/**',
        '**/*.d.ts',
        '**/test/setup.ts',
        '**/vitest.config.*',
        '**/node_modules/**',
      ],
      reporter: ['text', 'json', 'lcov', 'text-summary'],
    },
    environment: 'happy-dom',