Last active
January 7, 2025 17:12
-
-
Save grounzero/26add45ded22d26bc4179d53399c62b6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from 'fs'; | |
import path from 'path'; | |
import { fileURLToPath } from 'url'; | |
/* | |
* Usage: | |
* - Default: Combine files from the current directory into './current-directory.txt' | |
* node combine.js | |
* | |
* - Specify a custom directory: | |
* node combine.js src | |
* (Outputs to './src.txt') | |
* | |
* - Add additional file types (comma-separated): | |
* node combine.js src ./output.txt .json,.md | |
* (Includes .json and .md files) | |
* | |
* - Exclude additional directories or file patterns: | |
* node combine.js src ./output.txt .json,.md --exclude=coverage,temp | |
* | |
* - Specify encoding for reading and writing files: | |
* node combine.js src ./output.txt --encoding=utf16le | |
* | |
* - Filter files based on size (in bytes): | |
* node combine.js src ./output.txt --size=1024 | |
* (Skips files larger than 1KB) | |
* | |
* - Filter files based on modification date: | |
* node combine.js src ./output.txt --date="2023-01-01" | |
* (Skips files modified before January 1, 2023) | |
* | |
* - Show help: | |
* node combine.js --help | |
* | |
* Supported default file types: | |
* Web/Frontend: .tsx, .ts, .jsx, .js, .html, .css | |
* C++/C: .cpp, .hpp, .h, .c | |
* Python: .py | |
* C#: .cs | |
* Java: .java | |
* Kotlin: .kt, .kts | |
* Swift: .swift | |
* Ruby: .rb | |
* PHP: .php | |
* Go: .go | |
* Rust: .rs | |
* Shell scripts: .sh, .bash | |
* SQL: .sql | |
* | |
* Default excluded directories: node_modules, dist, build, bin, obj, .git, .vs, coverage, temp, debug, release | |
*/ | |
// Default list of supported file extensions | |
const defaultExtensions = [ | |
'.tsx', '.ts', '.jsx', '.js', '.html', '.css', // Web/frontend | |
'.cpp', '.hpp', '.h', '.c', // C++/C | |
'.py', // Python | |
'.cs', // C# | |
'.java', // Java | |
'.kt', '.kts', // Kotlin | |
'.swift', // Swift | |
'.rb', // Ruby | |
'.php', // PHP | |
'.go', // Go | |
'.rs', // Rust | |
'.sh', '.bash', // Shell scripts | |
'.sql', // SQL scripts | |
]; | |
// Default directories to exclude | |
const defaultExcludedDirectories = [ | |
'node_modules', 'dist', 'build', 'bin', 'obj', // Build output and metadata | |
'debug', 'release', // Debug/Release configurations | |
'.git', '.vs', '.vscode', // Metadata for version control and IDEs | |
'.idea', 'coverage', 'tmp', 'temp', // Cache and temp directories | |
'TestResults', // Visual Studio test results | |
'__pycache__', '.pytest_cache', // Python test and cache directories | |
'.cache', '.parcel-cache', '.eslintcache', // General cache directories | |
'logs', // Logs directory | |
]; | |
// Get the script file's name (replacement for __filename in ES modules) | |
const scriptFileName = path.basename(fileURLToPath(import.meta.url)); | |
const args = process.argv.slice(2); | |
const directoryPath = args[0] || process.cwd(); // Default: current directory | |
const outputFilePath = args[1] || `./${path.basename(path.resolve(directoryPath))}.txt`; // Default: source folder name + ".txt" | |
// Parse additional file types if provided | |
const additionalExtensions = args[2] ? args[2].split(',').map(ext => ext.trim()) : []; | |
const supportedExtensions = [...defaultExtensions, ...additionalExtensions]; | |
// Parse additional exclusions from --exclude argument | |
const excludeArg = args.find(arg => arg.startsWith('--exclude=')); | |
const additionalExcludedDirectories = excludeArg ? excludeArg.split('=')[1].split(',').map(dir => dir.trim()) : []; | |
const excludedDirectories = [...defaultExcludedDirectories, ...additionalExcludedDirectories]; | |
// Parse encoding option | |
const encodingArg = args.find(arg => arg.startsWith('--encoding=')); | |
const encoding = encodingArg ? encodingArg.split('=')[1] : 'utf8'; | |
// Parse file size filter option | |
const sizeArg = args.find(arg => arg.startsWith('--size=')); | |
const sizeFilter = sizeArg ? parseInt(sizeArg.split('=')[1]) : null; | |
// Parse modification date filter option | |
const dateArg = args.find(arg => arg.startsWith('--date=')); | |
const dateFilter = dateArg ? new Date(dateArg.split('=')[1]) : null; | |
// Input validation | |
if (sizeFilter && (isNaN(sizeFilter) || sizeFilter < 0)) { | |
console.error('Invalid size filter. Please provide a non-negative integer.'); | |
process.exit(1); | |
} | |
if (dateFilter && isNaN(dateFilter.getTime())) { | |
console.error('Invalid date filter. Please provide a valid date in the format "YYYY-MM-DD".'); | |
process.exit(1); | |
} | |
if (encoding && !Buffer.isEncoding(encoding)) { | |
console.error(`Unsupported encoding: ${encoding}. Please provide a valid encoding.`); | |
process.exit(1); | |
} | |
if (args.includes('--help')) { | |
console.log(` | |
Usage: | |
Default: Combine files from the current directory into './current-directory.txt' | |
node combine.js | |
Specify a custom directory: | |
node combine.js src | |
(Outputs to './src.txt') | |
Add additional file types (comma-separated): | |
node combine.js src ./output.txt .json,.md | |
(Includes .json and .md files) | |
Exclude additional directories or file patterns: | |
node combine.js src ./output.txt .json,.md --exclude=coverage,temp | |
Specify encoding for reading and writing files: | |
node combine.js src ./output.txt --encoding=utf16le | |
Filter files based on size (in bytes): | |
node combine.js src ./output.txt --size=1024 | |
(Skips files larger than 1KB) | |
Filter files based on modification date: | |
node combine.js src ./output.txt --date="2023-01-01" | |
(Skips files modified before January 1, 2023) | |
Show help: | |
node combine.js --help | |
Supported default file types: | |
${defaultExtensions.join(', ')} | |
Default excluded directories: | |
${defaultExcludedDirectories.join(', ')} | |
`); | |
process.exit(0); | |
} | |
function readFilesRecursively(dir, fileList = [], skipped = { files: [], dirs: [], emptyFiles: [], permissionDenied: [] }, dirCount = { count: 0 }) { | |
try { | |
const files = fs.readdirSync(dir, { withFileTypes: true }); | |
dirCount.count += 1; // Increment directory count for each directory visited | |
files.forEach((file) => { | |
const filePath = path.join(dir, file.name); | |
if (file.isDirectory()) { | |
// Skip excluded directories | |
if (excludedDirectories.includes(file.name)) { | |
const relativeDir = path.relative(process.cwd(), filePath); | |
console.log(`Skipped directory: ${relativeDir}`); | |
skipped.dirs.push(relativeDir); | |
return; | |
} | |
readFilesRecursively(filePath, fileList, skipped, dirCount); | |
} else if ( | |
file.name !== scriptFileName && // Exclude this script file | |
supportedExtensions.some(ext => file.name.endsWith(ext)) | |
) { | |
try { | |
const stats = fs.statSync(filePath); | |
if (stats.size === 0) { | |
const relativeFile = path.relative(process.cwd(), filePath); | |
console.log(`Empty file: ${relativeFile}`); | |
skipped.emptyFiles.push(relativeFile); | |
} else if (sizeFilter && stats.size > sizeFilter) { | |
const relativeFile = path.relative(process.cwd(), filePath); | |
console.log(`Skipped file (size > ${sizeFilter} bytes): ${relativeFile}`); | |
skipped.files.push(relativeFile); | |
} else if (dateFilter && stats.mtime < dateFilter) { | |
const relativeFile = path.relative(process.cwd(), filePath); | |
console.log(`Skipped file (modified before ${dateFilter.toISOString()}): ${relativeFile}`); | |
skipped.files.push(relativeFile); | |
} else { | |
fileList.push(filePath); | |
console.log(`Included file: ${path.relative(process.cwd(), filePath)}`); | |
} | |
} catch (error) { | |
if (error.code === 'EACCES') { | |
const relativeFile = path.relative(process.cwd(), filePath); | |
console.log(`Permission denied accessing file: ${relativeFile}`); | |
skipped.permissionDenied.push(relativeFile); | |
} else { | |
console.error(`Error accessing file: ${filePath}`, error.message); | |
} | |
} | |
} else { | |
const relativeFile = path.relative(process.cwd(), filePath); | |
console.log(`Skipped file: ${relativeFile}`); | |
skipped.files.push(relativeFile); | |
} | |
}); | |
} catch (error) { | |
if (error.code === 'EACCES') { | |
const relativeDir = path.relative(process.cwd(), dir); | |
console.log(`Permission denied accessing directory: ${relativeDir}`); | |
skipped.permissionDenied.push(relativeDir); | |
} else { | |
const relativeDir = path.relative(process.cwd(), dir); | |
console.error(`Error accessing directory: ${relativeDir}`, error.message); | |
skipped.dirs.push(relativeDir); | |
} | |
} | |
return { fileList, skipped, dirCount }; | |
} | |
const { fileList, skipped, dirCount } = readFilesRecursively(directoryPath); | |
let combinedContent = ''; | |
// Include only valid files in the output | |
fileList.forEach((file) => { | |
try { | |
const relativePath = path.relative(process.cwd(), file); // Get relative path | |
const content = fs.readFileSync(file, encoding); | |
combinedContent += `\n/* FILE: ${relativePath} */\n${content}\n`; | |
} catch (error) { | |
console.error(`Error reading file: ${file}`, error.message); | |
} | |
}); | |
try { | |
fs.writeFileSync(outputFilePath, combinedContent, encoding); | |
console.log(`Combined ${fileList.length} files from '${directoryPath}' into '${outputFilePath}' (${encoding} encoding).`); | |
console.log(`Directories recursed: ${dirCount.count}`); | |
console.log(`Skipped directories: ${skipped.dirs.length}`); | |
console.log(`Skipped files: ${skipped.files.length}`); | |
console.log(`Empty files (${skipped.emptyFiles.length}):`); | |
skipped.emptyFiles.forEach((file) => console.log(` - ${file}`)); | |
console.log(`Permission denied (${skipped.permissionDenied.length}):`); | |
skipped.permissionDenied.forEach((item) => console.log(` - ${item}`)); | |
} catch (error) { | |
console.error(`Error writing to output file: ${outputFilePath}`, error.message); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment