From 1c4a817634869e781e420ec3cd7b7ae43af4889d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:42:54 +0000 Subject: [PATCH 1/2] Initial plan From 126632561bf3f0f1421ab2196d3ed6c5f7e70a80 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:52:43 +0000 Subject: [PATCH 2/2] Implement command line version with Docker support and comprehensive documentation Co-authored-by: mopx <12272+mopx@users.noreply.github.com> --- .dockerignore | 41 +++++++ .gitignore | 4 + Dockerfile | 29 +++++ README.md | 219 +++++++++++++++++++++++++++------- bin/text-cleanup | 283 ++++++++++++++++++++++++++++++++++++++++++++ lib/text-cleaner.js | 105 ++++++++++++++++ package.json | 24 +++- 7 files changed, 657 insertions(+), 48 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100755 bin/text-cleanup create mode 100644 lib/text-cleaner.js diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..ae6acfc --- /dev/null +++ b/.dockerignore @@ -0,0 +1,41 @@ +# Docker ignore file for text-cleanup + +node_modules +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Development files +.git +.gitignore +.vercel +.env* + +# Web application files (not needed in CLI Docker image) +index.html +script.js +styles.css +vercel.json + +# Documentation and development files +README.md +.vscode +.idea + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Test files +test/ +tests/ +*.test.js + +# Temporary files +tmp/ +temp/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6357eec..30cc897 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,7 @@ Thumbs.db # Temporary files /tmp/ .vercel + +# Test files for development +test-files/ +README.md.backup diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c20bf32 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +FROM node:18-alpine + +# Set working directory +WORKDIR /app + +# Copy package files +COPY package*.json ./ + +# Install dependencies (if any) - use npm install since no package-lock exists +RUN npm install --only=production + +# Copy source code +COPY lib/ ./lib/ +COPY bin/ ./bin/ + +# Make CLI executable +RUN chmod +x bin/text-cleanup + +# Create a non-root user +RUN addgroup -g 1001 -S textcleanup && \ + adduser -S textcleanup -u 1001 + +# Change to non-root user +USER textcleanup + +# Set entrypoint to the CLI +ENTRYPOINT ["node", "bin/text-cleanup"] + +# No default CMD, so it can process stdin by default \ No newline at end of file diff --git a/README.md b/README.md index 13df9c8..a753919 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # text-cleanup -A simple, privacy-focused web application for cleaning text from unwanted formatting, special characters, and hidden artifacts commonly found when copying from AI tools, PDFs, websites, and documents. +A simple, privacy-focused application for cleaning text from unwanted formatting, special characters, and hidden artifacts commonly found when copying from AI tools, PDFs, websites, and documents. + +Available as both a **web application** and a **command-line tool**. ## About @@ -12,22 +14,19 @@ This tool helps you quickly clean up messy text by removing: - ๐Ÿ“„ Copy-paste formatting remnants - ๐Ÿค– AI-generated text artifacts -**Privacy-focused**: All processing happens locally in your browser - no data is sent to any servers. +**Privacy-focused**: All processing happens locally - no data is sent to any servers. -## Author +## Web Application -Created by **[@mopx](https://github.com/mopx)** +The web version is available at [your-deployment-url] and provides an easy-to-use interface for cleaning text in your browser. -- GitHub: [github.com/mopx](https://github.com/mopx) -- Repository: [github.com/mopx/text-cleanup](https://github.com/mopx/text-cleanup) +### Web Development -## Development - -### Prerequisites +#### Prerequisites - Node.js (for development server) - npm or Yarn package manager -### Setup +#### Setup 1. Clone the repository: ```bash git clone https://github.com/mopx/text-cleanup.git @@ -43,9 +42,9 @@ npm install yarn install ``` -### Running in Development Mode +#### Running in Development Mode -#### Primary Method: Using the dev script +##### Primary Method: Using the dev script ```bash # Using npm npm run dev @@ -56,7 +55,7 @@ yarn dev This starts a Node.js http-server on `http://localhost:3000` and automatically opens your browser. -#### Alternative: Using Vercel Dev (if not in Vercel environment) +##### Alternative: Using Vercel Dev (if not in Vercel environment) ```bash # Using npm npm run dev:vercel @@ -67,38 +66,123 @@ yarn dev:vercel Note: If you get a "recursive invocation" error, it means you're already in a Vercel environment. Use the primary method above instead. -#### Manual Server Options +## Command Line Tool + +### Installation -#### Recommended: Node.js http-server +#### Global Installation ```bash -# Install globally first -# With npm: npm install -g http-server -# With Yarn: yarn global add http-server +npm install -g text-cleanup +``` + +#### Local Installation +```bash +npm install text-cleanup +npx text-cleanup --help +``` + +#### From Source +```bash +git clone https://github.com/mopx/text-cleanup.git +cd text-cleanup +npm install +npm link # For global access +``` + +### CLI Usage + +```bash +text-cleanup [options] [input-file] [output-file] +``` + +#### Options + +- `-h, --help` - Show help message +- `-v, --version` - Show version information +- `-i, --input` - Input file path (default: stdin) +- `-o, --output` - Output file path (default: stdout) +- `--in-place` - Modify the input file in place + +#### Examples + +```bash +# Clean text from stdin to stdout +echo "This is **bold** text with 'smart quotes'." | text-cleanup + +# Clean a file and output to another file +text-cleanup messy-file.txt clean-file.txt + +# Clean a file in place +text-cleanup --in-place document.txt + +# Clean using explicit options +text-cleanup -i input.txt -o cleaned.txt + +# Use in a pipeline +curl -s https://example.com/api/text | text-cleanup | less +``` + +## Docker Usage + +The CLI tool is also available as a Docker container for easy deployment and usage without Node.js installation. -# Run the server -http-server -p 8000 +### Pull from Docker Hub (when available) +```bash +docker pull text-cleanup/cli ``` -#### VS Code Live Server (Recommended for VS Code users) +### Build from Source ```bash -# Install the Live Server extension in VS Code -# Right-click on index.html and select "Open with Live Server" +git clone https://github.com/mopx/text-cleanup.git +cd text-cleanup +docker build -t text-cleanup-cli . ``` -#### Alternative: Python (optional) +### Docker Examples + ```bash -# Using Python 3 (if available) -python3 -m http.server 8000 +# Clean text from stdin +echo "Messy **formatted** text" | docker run --rm -i text-cleanup-cli + +# Clean a file (mount current directory) +docker run --rm -v $(pwd):/data text-cleanup-cli /data/input.txt /data/output.txt -# Using Python 2 (if available) -python -m SimpleHTTPServer 8000 +# Show help +docker run --rm text-cleanup-cli --help + +# Show version +docker run --rm text-cleanup-cli --version + +# Use as part of a pipeline +curl -s https://api.example.com/text | docker run --rm -i text-cleanup-cli | jq . +``` + +### Docker Compose + +Create a `docker-compose.yml` file: + +```yaml +version: '3.8' +services: + text-cleanup: + image: text-cleanup-cli + volumes: + - ./data:/data + command: ["/data/input.txt", "/data/output.txt"] +``` + +Run with: +```bash +docker-compose run --rm text-cleanup ``` ## Production Deployment -### Deploy to Vercel (Recommended) +### Web Application Deployment + +#### Deploy to Vercel (Recommended) -#### Method 1: Using Vercel CLI +##### Method 1: Using Vercel CLI 1. Install Vercel CLI: ```bash # Using npm @@ -115,43 +199,81 @@ vercel Follow the prompts to configure your deployment. -#### Method 2: Using Git Integration +##### Method 2: Using Git Integration 1. Connect your repository to Vercel at [vercel.com](https://vercel.com) 2. Import your GitHub repository 3. Vercel will automatically build and deploy your app 4. Any pushes to the main branch will trigger automatic redeployments -### Deploy to Other Platforms +#### Deploy to Other Platforms -#### Netlify +##### Netlify 1. Connect your repository to Netlify at [netlify.com](https://netlify.com) 2. Set build settings: - Build command: (leave empty or use `echo 'No build required'`) - Publish directory: `/` (root directory) -#### GitHub Pages +##### GitHub Pages 1. Go to your repository settings on GitHub 2. Navigate to Pages section 3. Select source branch (usually `main`) 4. Your app will be available at `https://username.github.io/text-cleanup` -#### Manual Deployment +##### Manual Deployment Since this is a static site, you can deploy it to any web server: 1. Upload `index.html`, `script.js`, `styles.css`, and any other assets to your web server 2. Ensure the server can serve static files 3. Access your domain to view the app +### CLI Distribution + +The CLI tool can be distributed via: + +- **npm**: Publish to npm registry for easy installation +- **Docker Hub**: Container images for cross-platform deployment +- **GitHub Releases**: Binary distributions for different platforms +- **Package Managers**: Submit to platform-specific package managers + ## Project Structure ``` text-cleanup/ -โ”œโ”€โ”€ index.html # Main HTML file -โ”œโ”€โ”€ script.js # JavaScript functionality -โ”œโ”€โ”€ styles.css # CSS styles -โ”œโ”€โ”€ package.json # Project configuration -โ”œโ”€โ”€ vercel.json # Vercel deployment config -โ””โ”€โ”€ README.md # This file +โ”œโ”€โ”€ bin/ +โ”‚ โ””โ”€โ”€ text-cleanup # CLI executable +โ”œโ”€โ”€ lib/ +โ”‚ โ””โ”€โ”€ text-cleaner.js # Shared text cleaning logic +โ”œโ”€โ”€ test-files/ # Test files for development +โ”œโ”€โ”€ index.html # Web app main HTML +โ”œโ”€โ”€ script.js # Web app JavaScript +โ”œโ”€โ”€ styles.css # Web app styles +โ”œโ”€โ”€ package.json # Node.js project configuration +โ”œโ”€โ”€ Dockerfile # Docker container definition +โ”œโ”€โ”€ .dockerignore # Docker build exclusions +โ”œโ”€โ”€ vercel.json # Vercel deployment config +โ””โ”€โ”€ README.md # This file +``` + +## API Reference + +### TextCleaner Class + +The core text cleaning functionality is available as a Node.js module: + +```javascript +const TextCleaner = require('text-cleanup/lib/text-cleaner'); + +const cleaner = new TextCleaner(); +const cleaned = cleaner.cleanText('**Bold** text with 'smart quotes'.'); +console.log(cleaned); // "Bold text with 'smart quotes'." ``` +#### Methods + +- `cleanText(text)` - Main cleaning function that applies all cleaning methods +- `removeHiddenCharacters(text)` - Remove zero-width and invisible characters +- `normalizeWhitespace(text)` - Normalize spaces, quotes, and dashes +- `removeFormatting(text)` - Remove markdown and formatting artifacts +- `cleanSpecialCharacters(text)` - Clean problematic Unicode characters + ## Contributing We welcome contributions to the text-cleanup project! Here's how you can help: @@ -166,20 +288,29 @@ We welcome contributions to the text-cleanup project! Here's how you can help: ### Types of Contributions - ๐Ÿ› Bug fixes -- โœจ New features +- โœจ New features (web app, CLI, or Docker improvements) - ๐Ÿ“š Documentation improvements - ๐ŸŽจ UI/UX enhancements - โšก Performance optimizations - ๐Ÿงช Tests +- ๐Ÿ“ฆ Package management and distribution ### Development Guidelines - Keep the code simple and readable -- Test your changes thoroughly +- Test your changes thoroughly (both web and CLI versions) - Follow existing code style and conventions - Update documentation as needed +- Ensure Docker containers build and run correctly Feel free to open an issue if you have ideas, questions, or found a bug! +## Author + +Created by **[@mopx](https://github.com/mopx)** + +- GitHub: [github.com/mopx](https://github.com/mopx) +- Repository: [github.com/mopx/text-cleanup](https://github.com/mopx/text-cleanup) + ## License This project is licensed under the MIT License - see the details below: @@ -210,4 +341,4 @@ SOFTWARE. --- -**Made with โค๏ธ by the open source community. Contributions are always welcome!** +**Made with โค๏ธ by the open source community. Contributions are always welcome!** \ No newline at end of file diff --git a/bin/text-cleanup b/bin/text-cleanup new file mode 100755 index 0000000..f3638d0 --- /dev/null +++ b/bin/text-cleanup @@ -0,0 +1,283 @@ +#!/usr/bin/env node + +/** + * Text Cleanup CLI Tool + * Command line version of the text-cleanup web application + */ + +const fs = require('fs'); +const path = require('path'); +const TextCleaner = require('../lib/text-cleaner'); + +// Help text +const HELP_TEXT = ` +Text Cleanup CLI Tool + +Clean text from unwanted formatting, special characters, and hidden artifacts +commonly found when copying from AI tools, PDFs, websites, and documents. + +Usage: + text-cleanup [options] [input-file] [output-file] + +Options: + -h, --help Show this help message + -v, --version Show version information + -i, --input Input file path (default: stdin) + -o, --output Output file path (default: stdout) + --in-place Modify the input file in place + +Examples: + # Clean text from stdin to stdout + echo "messy text" | text-cleanup + + # Clean a file and output to another file + text-cleanup input.txt output.txt + + # Clean a file in place + text-cleanup --in-place messy-file.txt + + # Clean using explicit options + text-cleanup -i input.txt -o cleaned.txt + +Privacy: All processing happens locally - no data is sent to any servers. +`; + +class TextCleanupCLI { + constructor() { + this.cleaner = new TextCleaner(); + this.args = process.argv.slice(2); + this.options = { + help: false, + version: false, + input: null, + output: null, + inPlace: false + }; + } + + /** + * Parse command line arguments + */ + parseArgs() { + let i = 0; + const positionalArgs = []; + + while (i < this.args.length) { + const arg = this.args[i]; + + switch (arg) { + case '-h': + case '--help': + this.options.help = true; + break; + + case '-v': + case '--version': + this.options.version = true; + break; + + case '-i': + case '--input': + if (i + 1 < this.args.length) { + this.options.input = this.args[i + 1]; + i++; // Skip next argument as it's the value + } else { + this.error('--input option requires a file path'); + } + break; + + case '-o': + case '--output': + if (i + 1 < this.args.length) { + this.options.output = this.args[i + 1]; + i++; // Skip next argument as it's the value + } else { + this.error('--output option requires a file path'); + } + break; + + case '--in-place': + this.options.inPlace = true; + break; + + default: + if (arg.startsWith('-')) { + this.error(`Unknown option: ${arg}`); + } else { + positionalArgs.push(arg); + } + break; + } + i++; + } + + // Handle positional arguments (input-file output-file) + if (positionalArgs.length > 0) { + this.options.input = positionalArgs[0]; + } + if (positionalArgs.length > 1) { + this.options.output = positionalArgs[1]; + } + + // Validate options + if (this.options.inPlace && this.options.output) { + this.error('Cannot use --in-place with explicit output file'); + } + + if (this.options.inPlace && !this.options.input) { + this.error('--in-place requires an input file'); + } + + if (this.options.inPlace) { + this.options.output = this.options.input; + } + } + + /** + * Show error message and exit + */ + error(message) { + console.error(`Error: ${message}`); + console.error('Use --help for usage information'); + process.exit(1); + } + + /** + * Show help text + */ + showHelp() { + console.log(HELP_TEXT); + process.exit(0); + } + + /** + * Show version information + */ + showVersion() { + try { + const packagePath = path.join(__dirname, '../package.json'); + const packageJson = JSON.parse(fs.readFileSync(packagePath, 'utf8')); + console.log(`text-cleanup v${packageJson.version}`); + } catch (err) { + console.log('text-cleanup'); + } + process.exit(0); + } + + /** + * Read input text from file or stdin + */ + async readInput() { + return new Promise((resolve, reject) => { + if (this.options.input) { + // Read from file + try { + if (!fs.existsSync(this.options.input)) { + reject(new Error(`Input file does not exist: ${this.options.input}`)); + return; + } + const content = fs.readFileSync(this.options.input, 'utf8'); + resolve(content); + } catch (err) { + reject(new Error(`Failed to read input file: ${err.message}`)); + } + } else { + // Read from stdin + let input = ''; + process.stdin.setEncoding('utf8'); + + process.stdin.on('data', (chunk) => { + input += chunk; + }); + + process.stdin.on('end', () => { + resolve(input); + }); + + process.stdin.on('error', (err) => { + reject(new Error(`Failed to read from stdin: ${err.message}`)); + }); + + // Check if stdin has data + if (process.stdin.isTTY) { + reject(new Error('No input provided. Use --help for usage information.')); + } + } + }); + } + + /** + * Write output text to file or stdout + */ + async writeOutput(text) { + return new Promise((resolve, reject) => { + if (this.options.output) { + // Write to file + try { + // Ensure output directory exists + const outputDir = path.dirname(this.options.output); + if (outputDir !== '.' && !fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + fs.writeFileSync(this.options.output, text, 'utf8'); + resolve(); + } catch (err) { + reject(new Error(`Failed to write output file: ${err.message}`)); + } + } else { + // Write to stdout + process.stdout.write(text); + resolve(); + } + }); + } + + /** + * Main execution function + */ + async run() { + try { + this.parseArgs(); + + if (this.options.help) { + this.showHelp(); + } + + if (this.options.version) { + this.showVersion(); + } + + // Read input + const inputText = await this.readInput(); + + // Clean the text + const cleanedText = this.cleaner.cleanText(inputText); + + // Write output + await this.writeOutput(cleanedText); + + // Show success message for file operations + if (this.options.output) { + if (this.options.inPlace) { + console.error(`โœ“ Cleaned text in place: ${this.options.input}`); + } else { + console.error(`โœ“ Cleaned text written to: ${this.options.output}`); + } + } + + process.exit(0); + } catch (err) { + console.error(`Error: ${err.message}`); + process.exit(1); + } + } +} + +// Run the CLI if this file is executed directly +if (require.main === module) { + const cli = new TextCleanupCLI(); + cli.run(); +} + +module.exports = TextCleanupCLI; \ No newline at end of file diff --git a/lib/text-cleaner.js b/lib/text-cleaner.js new file mode 100644 index 0000000..910fd91 --- /dev/null +++ b/lib/text-cleaner.js @@ -0,0 +1,105 @@ +/** + * Text Cleaning Library + * Extracted from the web version for reuse in CLI and other contexts + */ + +class TextCleaner { + /** + * Main cleaning function that applies all cleaning methods + * @param {string} text - Input text to clean + * @returns {string} - Cleaned text + */ + cleanText(text) { + if (!text || typeof text !== 'string') { + return ''; + } + + if (!text.trim()) { + return ''; + } + + let cleaned = text; + + // Remove various types of hidden and special characters + cleaned = this.removeHiddenCharacters(cleaned); + cleaned = this.normalizeWhitespace(cleaned); + cleaned = this.removeFormatting(cleaned); + cleaned = this.cleanSpecialCharacters(cleaned); + + return cleaned; + } + + /** + * Remove hidden and zero-width characters + * @param {string} text - Input text + * @returns {string} - Text with hidden characters removed + */ + removeHiddenCharacters(text) { + return text + // Remove zero-width characters + .replace(/[\u200B-\u200D\uFEFF]/g, '') + // Remove other invisible characters + .replace(/[\u00AD\u2060\u2061\u2062\u2063\u2064]/g, '') + // Remove directional marks + .replace(/[\u202A-\u202E]/g, '') + // Remove various spaces that might be problematic + .replace(/[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000]/g, ' ') + // Remove control characters (except tab, newline, carriage return) + .replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, ''); + } + + /** + * Normalize whitespace and special characters + * @param {string} text - Input text + * @returns {string} - Text with normalized whitespace + */ + normalizeWhitespace(text) { + return text + // Convert various dash types to regular dash + .replace(/[โ€“โ€”โˆ’]/g, '-') + // Convert smart quotes to regular quotes + .replace(/[""]/g, '"') + .replace(/['']/g, "'") + // Normalize multiple spaces to single space (but preserve newlines) + .replace(/[ \t]+/g, ' ') + // Remove trailing spaces from lines + .replace(/[ \t]+$/gm, '') + // Remove leading spaces from lines (optional) + .replace(/^[ \t]+/gm, ''); + } + + /** + * Remove formatting artifacts like markdown + * @param {string} text - Input text + * @returns {string} - Text with formatting removed + */ + removeFormatting(text) { + return text + // Remove common AI/formatting artifacts + .replace(/\*\*(.*?)\*\*/g, '$1') // Remove bold markdown + .replace(/\*(.*?)\*/g, '$1') // Remove italic markdown + .replace(/_(.*?)_/g, '$1') // Remove underline markdown + .replace(/`(.*?)`/g, '$1') // Remove code markdown + .replace(/~~(.*?)~~/g, '$1') // Remove strikethrough + // Remove other formatting characters that might be artifacts + .replace(/[\u2022\u25E6\u2043\u2219]/g, 'โ€ข') // Normalize bullet points + .replace(/[\u2013\u2014]/g, '-'); // Normalize dashes + } + + /** + * Clean special characters that might cause issues + * @param {string} text - Input text + * @returns {string} - Text with special characters cleaned + */ + cleanSpecialCharacters(text) { + return text + // Remove or replace problematic characters + .replace(/[^\x00-\x7F\u00A0-\u024F\u0400-\u04FF\u1E00-\u1EFF\u2000-\u206F\u20A0-\u20CF\u2100-\u214F\u2190-\u21FF]/g, '') + // Clean up any remaining multiple spaces (but preserve newlines) + .replace(/[ \t]+/g, ' ') + // Trim spaces from beginning and end of each line + .replace(/^[ \t]+|[ \t]+$/gm, ''); + } +} + +module.exports = TextCleaner; \ No newline at end of file diff --git a/package.json b/package.json index 4e58945..50a4ac1 100644 --- a/package.json +++ b/package.json @@ -1,17 +1,33 @@ { "name": "text-cleanup", "version": "1.0.0", - "description": "Special characters cleanup app", - "main": "index.html", + "description": "Clean text from unwanted formatting, special characters, and hidden artifacts. Available as web app and CLI tool.", + "main": "lib/text-cleaner.js", + "bin": { + "text-cleanup": "./bin/text-cleanup" + }, "scripts": { "dev": "http-server -p 3000 -o", "dev:vercel": "vercel dev", "build": "echo 'No build step required'", - "start": "echo 'Static site - no start command needed'" + "start": "echo 'Static site - no start command needed'", + "cli": "node bin/text-cleanup", + "test": "echo 'No tests defined yet'" }, - "keywords": ["text", "cleanup", "special-characters", "formatting"], + "keywords": ["text", "cleanup", "special-characters", "formatting", "cli", "command-line"], "author": "", "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "files": [ + "bin/", + "lib/", + "index.html", + "script.js", + "styles.css", + "README.md" + ], "dependencies": {}, "devDependencies": {} } \ No newline at end of file