Skip to content

Instantly share code, notes, and snippets.

@grounzero
Last active January 7, 2025 17:12
Show Gist options
  • Save grounzero/26add45ded22d26bc4179d53399c62b6 to your computer and use it in GitHub Desktop.
Save grounzero/26add45ded22d26bc4179d53399c62b6 to your computer and use it in GitHub Desktop.
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
/*
* Usage:
* - Default: Combine files from the current directory into './current-directory.txt'
* node combine.js
*
* - Specify a custom directory:
* node combine.js src
* (Outputs to './src.txt')
*
* - Add additional file types (comma-separated):
* node combine.js src ./output.txt .json,.md
* (Includes .json and .md files)
*
* - Exclude additional directories or file patterns:
* node combine.js src ./output.txt .json,.md --exclude=coverage,temp
*
* - Specify encoding for reading and writing files:
* node combine.js src ./output.txt --encoding=utf16le
*
* - Filter files based on size (in bytes):
* node combine.js src ./output.txt --size=1024
* (Skips files larger than 1KB)
*
* - Filter files based on modification date:
* node combine.js src ./output.txt --date="2023-01-01"
* (Skips files modified before January 1, 2023)
*
* - Show help:
* node combine.js --help
*
* Supported default file types:
* Web/Frontend: .tsx, .ts, .jsx, .js, .html, .css
* C++/C: .cpp, .hpp, .h, .c
* Python: .py
* C#: .cs
* Java: .java
* Kotlin: .kt, .kts
* Swift: .swift
* Ruby: .rb
* PHP: .php
* Go: .go
* Rust: .rs
* Shell scripts: .sh, .bash
* SQL: .sql
*
* Default excluded directories: node_modules, dist, build, bin, obj, .git, .vs, coverage, temp, debug, release
*/
// Default list of supported file extensions
const defaultExtensions = [
'.tsx', '.ts', '.jsx', '.js', '.html', '.css', // Web/frontend
'.cpp', '.hpp', '.h', '.c', // C++/C
'.py', // Python
'.cs', // C#
'.java', // Java
'.kt', '.kts', // Kotlin
'.swift', // Swift
'.rb', // Ruby
'.php', // PHP
'.go', // Go
'.rs', // Rust
'.sh', '.bash', // Shell scripts
'.sql', // SQL scripts
];
// Default directories to exclude
const defaultExcludedDirectories = [
'node_modules', 'dist', 'build', 'bin', 'obj', // Build output and metadata
'debug', 'release', // Debug/Release configurations
'.git', '.vs', '.vscode', // Metadata for version control and IDEs
'.idea', 'coverage', 'tmp', 'temp', // Cache and temp directories
'TestResults', // Visual Studio test results
'__pycache__', '.pytest_cache', // Python test and cache directories
'.cache', '.parcel-cache', '.eslintcache', // General cache directories
'logs', // Logs directory
];
// Get the script file's name (replacement for __filename in ES modules)
const scriptFileName = path.basename(fileURLToPath(import.meta.url));
const args = process.argv.slice(2);
const directoryPath = args[0] || process.cwd(); // Default: current directory
const outputFilePath = args[1] || `./${path.basename(path.resolve(directoryPath))}.txt`; // Default: source folder name + ".txt"
// Parse additional file types if provided
const additionalExtensions = args[2] ? args[2].split(',').map(ext => ext.trim()) : [];
const supportedExtensions = [...defaultExtensions, ...additionalExtensions];
// Parse additional exclusions from --exclude argument
const excludeArg = args.find(arg => arg.startsWith('--exclude='));
const additionalExcludedDirectories = excludeArg ? excludeArg.split('=')[1].split(',').map(dir => dir.trim()) : [];
const excludedDirectories = [...defaultExcludedDirectories, ...additionalExcludedDirectories];
// Parse encoding option
const encodingArg = args.find(arg => arg.startsWith('--encoding='));
const encoding = encodingArg ? encodingArg.split('=')[1] : 'utf8';
// Parse file size filter option
const sizeArg = args.find(arg => arg.startsWith('--size='));
const sizeFilter = sizeArg ? parseInt(sizeArg.split('=')[1]) : null;
// Parse modification date filter option
const dateArg = args.find(arg => arg.startsWith('--date='));
const dateFilter = dateArg ? new Date(dateArg.split('=')[1]) : null;
// Input validation
if (sizeFilter && (isNaN(sizeFilter) || sizeFilter < 0)) {
console.error('Invalid size filter. Please provide a non-negative integer.');
process.exit(1);
}
if (dateFilter && isNaN(dateFilter.getTime())) {
console.error('Invalid date filter. Please provide a valid date in the format "YYYY-MM-DD".');
process.exit(1);
}
if (encoding && !Buffer.isEncoding(encoding)) {
console.error(`Unsupported encoding: ${encoding}. Please provide a valid encoding.`);
process.exit(1);
}
if (args.includes('--help')) {
console.log(`
Usage:
Default: Combine files from the current directory into './current-directory.txt'
node combine.js
Specify a custom directory:
node combine.js src
(Outputs to './src.txt')
Add additional file types (comma-separated):
node combine.js src ./output.txt .json,.md
(Includes .json and .md files)
Exclude additional directories or file patterns:
node combine.js src ./output.txt .json,.md --exclude=coverage,temp
Specify encoding for reading and writing files:
node combine.js src ./output.txt --encoding=utf16le
Filter files based on size (in bytes):
node combine.js src ./output.txt --size=1024
(Skips files larger than 1KB)
Filter files based on modification date:
node combine.js src ./output.txt --date="2023-01-01"
(Skips files modified before January 1, 2023)
Show help:
node combine.js --help
Supported default file types:
${defaultExtensions.join(', ')}
Default excluded directories:
${defaultExcludedDirectories.join(', ')}
`);
process.exit(0);
}
function readFilesRecursively(dir, fileList = [], skipped = { files: [], dirs: [], emptyFiles: [], permissionDenied: [] }, dirCount = { count: 0 }) {
try {
const files = fs.readdirSync(dir, { withFileTypes: true });
dirCount.count += 1; // Increment directory count for each directory visited
files.forEach((file) => {
const filePath = path.join(dir, file.name);
if (file.isDirectory()) {
// Skip excluded directories
if (excludedDirectories.includes(file.name)) {
const relativeDir = path.relative(process.cwd(), filePath);
console.log(`Skipped directory: ${relativeDir}`);
skipped.dirs.push(relativeDir);
return;
}
readFilesRecursively(filePath, fileList, skipped, dirCount);
} else if (
file.name !== scriptFileName && // Exclude this script file
supportedExtensions.some(ext => file.name.endsWith(ext))
) {
try {
const stats = fs.statSync(filePath);
if (stats.size === 0) {
const relativeFile = path.relative(process.cwd(), filePath);
console.log(`Empty file: ${relativeFile}`);
skipped.emptyFiles.push(relativeFile);
} else if (sizeFilter && stats.size > sizeFilter) {
const relativeFile = path.relative(process.cwd(), filePath);
console.log(`Skipped file (size > ${sizeFilter} bytes): ${relativeFile}`);
skipped.files.push(relativeFile);
} else if (dateFilter && stats.mtime < dateFilter) {
const relativeFile = path.relative(process.cwd(), filePath);
console.log(`Skipped file (modified before ${dateFilter.toISOString()}): ${relativeFile}`);
skipped.files.push(relativeFile);
} else {
fileList.push(filePath);
console.log(`Included file: ${path.relative(process.cwd(), filePath)}`);
}
} catch (error) {
if (error.code === 'EACCES') {
const relativeFile = path.relative(process.cwd(), filePath);
console.log(`Permission denied accessing file: ${relativeFile}`);
skipped.permissionDenied.push(relativeFile);
} else {
console.error(`Error accessing file: ${filePath}`, error.message);
}
}
} else {
const relativeFile = path.relative(process.cwd(), filePath);
console.log(`Skipped file: ${relativeFile}`);
skipped.files.push(relativeFile);
}
});
} catch (error) {
if (error.code === 'EACCES') {
const relativeDir = path.relative(process.cwd(), dir);
console.log(`Permission denied accessing directory: ${relativeDir}`);
skipped.permissionDenied.push(relativeDir);
} else {
const relativeDir = path.relative(process.cwd(), dir);
console.error(`Error accessing directory: ${relativeDir}`, error.message);
skipped.dirs.push(relativeDir);
}
}
return { fileList, skipped, dirCount };
}
const { fileList, skipped, dirCount } = readFilesRecursively(directoryPath);
let combinedContent = '';
// Include only valid files in the output
fileList.forEach((file) => {
try {
const relativePath = path.relative(process.cwd(), file); // Get relative path
const content = fs.readFileSync(file, encoding);
combinedContent += `\n/* FILE: ${relativePath} */\n${content}\n`;
} catch (error) {
console.error(`Error reading file: ${file}`, error.message);
}
});
try {
fs.writeFileSync(outputFilePath, combinedContent, encoding);
console.log(`Combined ${fileList.length} files from '${directoryPath}' into '${outputFilePath}' (${encoding} encoding).`);
console.log(`Directories recursed: ${dirCount.count}`);
console.log(`Skipped directories: ${skipped.dirs.length}`);
console.log(`Skipped files: ${skipped.files.length}`);
console.log(`Empty files (${skipped.emptyFiles.length}):`);
skipped.emptyFiles.forEach((file) => console.log(` - ${file}`));
console.log(`Permission denied (${skipped.permissionDenied.length}):`);
skipped.permissionDenied.forEach((item) => console.log(` - ${item}`));
} catch (error) {
console.error(`Error writing to output file: ${outputFilePath}`, error.message);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment