commit fdfa87a54b7e158ce5a614d7f9f074a3db30441a Author: kisfenyo Date: Thu Feb 12 11:57:07 2026 +0100 misc-scripts created diff --git a/collect-repos.sh b/collect-repos.sh new file mode 100644 index 0000000..18f5b17 --- /dev/null +++ b/collect-repos.sh @@ -0,0 +1,218 @@ +#!/bin/bash +# collect-repos.sh +# Run from ~/git/ - combines all files from each repo into a single .txt file +# Output goes to ~/git/collected/ folder + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +OUTPUT_DIR="${SCRIPT_DIR}/collected" +TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') + +# Max file size to include (skip large binaries/assets) +MAX_FILE_SIZE_KB=500 + +# Patterns to skip +SKIP_DIRS=(".git" "node_modules" "__pycache__" ".venv" "venv" ".cache") +SKIP_FILES=("*.png" "*.jpg" "*.jpeg" "*.gif" "*.webp" "*.ico" "*.svg" "*.mp4" "*.mp3" "*.zip" "*.tar" "*.gz" "*.bz2" "*.7z" "*.bin" "*.exe" "*.so" "*.o" "*.pyc" "*.woff" "*.woff2" "*.ttf" "*.eot" "*.pdf") + +mkdir -p "$OUTPUT_DIR" + +# Build find exclusion args for directories +build_skip_args() { + local args="" + for dir in "${SKIP_DIRS[@]}"; do + args="$args -path '*/${dir}' -o -path '*/${dir}/*' -o" + done + # Remove trailing -o + echo "${args% -o}" +} + +# Check if file matches skip patterns +should_skip_file() { + local file="$1" + local basename=$(basename "$file") + for pattern in "${SKIP_FILES[@]}"; do + # shellcheck disable=SC2254 + case "$basename" in + $pattern) return 0 ;; + esac + done + return 1 +} + +# Known text file extensions - always include these +TEXT_EXTENSIONS=("sh" "bash" "zsh" "yaml" "yml" "json" "toml" "ini" "cfg" "conf" "txt" "md" "py" "js" "ts" "html" "css" "xml" "env" "service" "timer" "sql" "lua" "rb" "go" "rs" "java" "c" "h" "cpp" "hpp" "Makefile" "Dockerfile" "csv" "log" "properties" "rules") + +is_known_text() { + local file="$1" + local basename=$(basename "$file") + local ext="${basename##*.}" + # Files without extension but known names + case "$basename" in + Makefile|Dockerfile|Vagrantfile|Gemfile|Rakefile|.gitignore|.env*) return 0 ;; + esac + for text_ext in "${TEXT_EXTENSIONS[@]}"; do + [ "$ext" = "$text_ext" ] && return 0 + done + return 1 +} + +# Check if file is binary +is_binary() { + local file="$1" + # Known text extensions always pass + is_known_text "$file" && return 1 + if file "$file" | grep -qE 'binary|executable|image|archive|compressed'; then + return 0 + fi + # Also check with head - if it contains null bytes, it's binary + if head -c 512 "$file" | grep -qP '\x00' 2>/dev/null; then + return 0 + fi + return 1 +} + +echo "=== Repo Collector ===" +echo "Timestamp: $TIMESTAMP" +echo "Working dir: $SCRIPT_DIR" +echo "Output dir: $OUTPUT_DIR" +echo "" + +# --- Pull all repos first --- +echo "--- Pulling all repos ---" +for repo_dir in */; do + repo_dir="${repo_dir%/}" + [ "$repo_dir" = "collected" ] && continue + [ ! -d "$repo_dir/.git" ] && continue + echo -n " git pull ${repo_dir}... " + if git -C "$repo_dir" pull --quiet 2>&1; then + echo "ok" + else + echo "FAILED (continuing anyway)" + fi +done +echo "" + +repo_count=0 +total_files=0 +updated_repos=() + +for repo_dir in */; do + repo_dir="${repo_dir%/}" + + # Skip the output directory itself + [ "$repo_dir" = "collected" ] && continue + + # Only process directories that look like repos (have files in them) + [ ! -d "$repo_dir" ] && continue + + repo_count=$((repo_count + 1)) + file_count=0 + skipped_count=0 + output_file="${OUTPUT_DIR}/${repo_dir}.txt" + + # Check if repo changed since last collection + if [ -f "$output_file" ]; then + output_mtime=$(stat -c %Y "$output_file" 2>/dev/null || echo 0) + # Get latest commit timestamp in the repo + repo_last_commit=$(git -C "$repo_dir" log -1 --format=%ct 2>/dev/null || echo 0) + if [ "$repo_last_commit" -le "$output_mtime" ] 2>/dev/null; then + echo " Skipping: ${repo_dir}/ (no changes)" + continue + fi + fi + + updated_repos+=("$repo_dir") + echo " Updating: ${repo_dir}/" + + # Write header + { + echo "================================================================================" + echo "Repository: ${repo_dir}" + echo "Collected: ${TIMESTAMP}" + echo "================================================================================" + echo "" + } > "$output_file" + + # Find all files, excluding skip dirs + while IFS= read -r -d '' file; do + rel_path="${file#${repo_dir}/}" + + # Skip files with "secret" in the name + if echo "$rel_path" | grep -qi 'secret'; then + skipped_count=$((skipped_count + 1)) + continue + fi + + # Skip by pattern + if should_skip_file "$file"; then + skipped_count=$((skipped_count + 1)) + continue + fi + + # Skip large files + file_size_kb=$(du -k "$file" 2>/dev/null | cut -f1) + if [ "$file_size_kb" -gt "$MAX_FILE_SIZE_KB" ]; then + { + echo "--- FILE: ${rel_path} ---" + echo "[SKIPPED: file too large (${file_size_kb}KB > ${MAX_FILE_SIZE_KB}KB limit)]" + echo "" + } >> "$output_file" + skipped_count=$((skipped_count + 1)) + continue + fi + + # Skip binary files + if is_binary "$file"; then + { + echo "--- FILE: ${rel_path} ---" + echo "[SKIPPED: binary file]" + echo "" + } >> "$output_file" + skipped_count=$((skipped_count + 1)) + continue + fi + + # Include the file + { + echo "--- FILE: ${rel_path} ---" + cat "$file" + echo "" + echo "" + } >> "$output_file" + + file_count=$((file_count + 1)) + + done < <(find "$repo_dir" -type f \ + ! -path '*/.git/*' ! -path '*/.git' \ + ! -path '*/node_modules/*' \ + ! -path '*/__pycache__/*' \ + ! -path '*/.venv/*' \ + ! -path '*/venv/*' \ + ! -path '*/.cache/*' \ + -print0 | sort -z) + + # Append summary at the end + { + echo "================================================================================" + echo "Summary: ${file_count} files included, ${skipped_count} skipped" + echo "================================================================================" + } >> "$output_file" + + total_files=$((total_files + file_count)) + echo " -> ${output_file} (${file_count} files, ${skipped_count} skipped)" + +done + +echo "" +echo "Done! ${repo_count} repos found, ${#updated_repos[@]} updated, ${total_files} total files collected." +if [ ${#updated_repos[@]} -gt 0 ]; then + echo "" + echo "Updated repos (upload these to project):" + for repo in "${updated_repos[@]}"; do + echo " -> ${OUTPUT_DIR}/${repo}.txt" + done +else + echo "All repos up to date, nothing to upload." +fi \ No newline at end of file