misc-scripts created
This commit is contained in:
@@ -0,0 +1,218 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# collect-repos.sh
|
||||||
|
# Run from ~/git/ - combines all files from each repo into a single .txt file
|
||||||
|
# Output goes to ~/git/collected/ folder
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
OUTPUT_DIR="${SCRIPT_DIR}/collected"
|
||||||
|
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
# Max file size to include (skip large binaries/assets)
|
||||||
|
MAX_FILE_SIZE_KB=500
|
||||||
|
|
||||||
|
# Patterns to skip
|
||||||
|
SKIP_DIRS=(".git" "node_modules" "__pycache__" ".venv" "venv" ".cache")
|
||||||
|
SKIP_FILES=("*.png" "*.jpg" "*.jpeg" "*.gif" "*.webp" "*.ico" "*.svg" "*.mp4" "*.mp3" "*.zip" "*.tar" "*.gz" "*.bz2" "*.7z" "*.bin" "*.exe" "*.so" "*.o" "*.pyc" "*.woff" "*.woff2" "*.ttf" "*.eot" "*.pdf")
|
||||||
|
|
||||||
|
mkdir -p "$OUTPUT_DIR"
|
||||||
|
|
||||||
|
# Build find exclusion args for directories
|
||||||
|
build_skip_args() {
|
||||||
|
local args=""
|
||||||
|
for dir in "${SKIP_DIRS[@]}"; do
|
||||||
|
args="$args -path '*/${dir}' -o -path '*/${dir}/*' -o"
|
||||||
|
done
|
||||||
|
# Remove trailing -o
|
||||||
|
echo "${args% -o}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if file matches skip patterns
|
||||||
|
should_skip_file() {
|
||||||
|
local file="$1"
|
||||||
|
local basename=$(basename "$file")
|
||||||
|
for pattern in "${SKIP_FILES[@]}"; do
|
||||||
|
# shellcheck disable=SC2254
|
||||||
|
case "$basename" in
|
||||||
|
$pattern) return 0 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Known text file extensions - always include these
|
||||||
|
TEXT_EXTENSIONS=("sh" "bash" "zsh" "yaml" "yml" "json" "toml" "ini" "cfg" "conf" "txt" "md" "py" "js" "ts" "html" "css" "xml" "env" "service" "timer" "sql" "lua" "rb" "go" "rs" "java" "c" "h" "cpp" "hpp" "Makefile" "Dockerfile" "csv" "log" "properties" "rules")
|
||||||
|
|
||||||
|
is_known_text() {
|
||||||
|
local file="$1"
|
||||||
|
local basename=$(basename "$file")
|
||||||
|
local ext="${basename##*.}"
|
||||||
|
# Files without extension but known names
|
||||||
|
case "$basename" in
|
||||||
|
Makefile|Dockerfile|Vagrantfile|Gemfile|Rakefile|.gitignore|.env*) return 0 ;;
|
||||||
|
esac
|
||||||
|
for text_ext in "${TEXT_EXTENSIONS[@]}"; do
|
||||||
|
[ "$ext" = "$text_ext" ] && return 0
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if file is binary
|
||||||
|
is_binary() {
|
||||||
|
local file="$1"
|
||||||
|
# Known text extensions always pass
|
||||||
|
is_known_text "$file" && return 1
|
||||||
|
if file "$file" | grep -qE 'binary|executable|image|archive|compressed'; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
# Also check with head - if it contains null bytes, it's binary
|
||||||
|
if head -c 512 "$file" | grep -qP '\x00' 2>/dev/null; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "=== Repo Collector ==="
|
||||||
|
echo "Timestamp: $TIMESTAMP"
|
||||||
|
echo "Working dir: $SCRIPT_DIR"
|
||||||
|
echo "Output dir: $OUTPUT_DIR"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# --- Pull all repos first ---
|
||||||
|
echo "--- Pulling all repos ---"
|
||||||
|
for repo_dir in */; do
|
||||||
|
repo_dir="${repo_dir%/}"
|
||||||
|
[ "$repo_dir" = "collected" ] && continue
|
||||||
|
[ ! -d "$repo_dir/.git" ] && continue
|
||||||
|
echo -n " git pull ${repo_dir}... "
|
||||||
|
if git -C "$repo_dir" pull --quiet 2>&1; then
|
||||||
|
echo "ok"
|
||||||
|
else
|
||||||
|
echo "FAILED (continuing anyway)"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
repo_count=0
|
||||||
|
total_files=0
|
||||||
|
updated_repos=()
|
||||||
|
|
||||||
|
for repo_dir in */; do
|
||||||
|
repo_dir="${repo_dir%/}"
|
||||||
|
|
||||||
|
# Skip the output directory itself
|
||||||
|
[ "$repo_dir" = "collected" ] && continue
|
||||||
|
|
||||||
|
# Only process directories that look like repos (have files in them)
|
||||||
|
[ ! -d "$repo_dir" ] && continue
|
||||||
|
|
||||||
|
repo_count=$((repo_count + 1))
|
||||||
|
file_count=0
|
||||||
|
skipped_count=0
|
||||||
|
output_file="${OUTPUT_DIR}/${repo_dir}.txt"
|
||||||
|
|
||||||
|
# Check if repo changed since last collection
|
||||||
|
if [ -f "$output_file" ]; then
|
||||||
|
output_mtime=$(stat -c %Y "$output_file" 2>/dev/null || echo 0)
|
||||||
|
# Get latest commit timestamp in the repo
|
||||||
|
repo_last_commit=$(git -C "$repo_dir" log -1 --format=%ct 2>/dev/null || echo 0)
|
||||||
|
if [ "$repo_last_commit" -le "$output_mtime" ] 2>/dev/null; then
|
||||||
|
echo " Skipping: ${repo_dir}/ (no changes)"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
updated_repos+=("$repo_dir")
|
||||||
|
echo " Updating: ${repo_dir}/"
|
||||||
|
|
||||||
|
# Write header
|
||||||
|
{
|
||||||
|
echo "================================================================================"
|
||||||
|
echo "Repository: ${repo_dir}"
|
||||||
|
echo "Collected: ${TIMESTAMP}"
|
||||||
|
echo "================================================================================"
|
||||||
|
echo ""
|
||||||
|
} > "$output_file"
|
||||||
|
|
||||||
|
# Find all files, excluding skip dirs
|
||||||
|
while IFS= read -r -d '' file; do
|
||||||
|
rel_path="${file#${repo_dir}/}"
|
||||||
|
|
||||||
|
# Skip files with "secret" in the name
|
||||||
|
if echo "$rel_path" | grep -qi 'secret'; then
|
||||||
|
skipped_count=$((skipped_count + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Skip by pattern
|
||||||
|
if should_skip_file "$file"; then
|
||||||
|
skipped_count=$((skipped_count + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Skip large files
|
||||||
|
file_size_kb=$(du -k "$file" 2>/dev/null | cut -f1)
|
||||||
|
if [ "$file_size_kb" -gt "$MAX_FILE_SIZE_KB" ]; then
|
||||||
|
{
|
||||||
|
echo "--- FILE: ${rel_path} ---"
|
||||||
|
echo "[SKIPPED: file too large (${file_size_kb}KB > ${MAX_FILE_SIZE_KB}KB limit)]"
|
||||||
|
echo ""
|
||||||
|
} >> "$output_file"
|
||||||
|
skipped_count=$((skipped_count + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Skip binary files
|
||||||
|
if is_binary "$file"; then
|
||||||
|
{
|
||||||
|
echo "--- FILE: ${rel_path} ---"
|
||||||
|
echo "[SKIPPED: binary file]"
|
||||||
|
echo ""
|
||||||
|
} >> "$output_file"
|
||||||
|
skipped_count=$((skipped_count + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Include the file
|
||||||
|
{
|
||||||
|
echo "--- FILE: ${rel_path} ---"
|
||||||
|
cat "$file"
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
} >> "$output_file"
|
||||||
|
|
||||||
|
file_count=$((file_count + 1))
|
||||||
|
|
||||||
|
done < <(find "$repo_dir" -type f \
|
||||||
|
! -path '*/.git/*' ! -path '*/.git' \
|
||||||
|
! -path '*/node_modules/*' \
|
||||||
|
! -path '*/__pycache__/*' \
|
||||||
|
! -path '*/.venv/*' \
|
||||||
|
! -path '*/venv/*' \
|
||||||
|
! -path '*/.cache/*' \
|
||||||
|
-print0 | sort -z)
|
||||||
|
|
||||||
|
# Append summary at the end
|
||||||
|
{
|
||||||
|
echo "================================================================================"
|
||||||
|
echo "Summary: ${file_count} files included, ${skipped_count} skipped"
|
||||||
|
echo "================================================================================"
|
||||||
|
} >> "$output_file"
|
||||||
|
|
||||||
|
total_files=$((total_files + file_count))
|
||||||
|
echo " -> ${output_file} (${file_count} files, ${skipped_count} skipped)"
|
||||||
|
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Done! ${repo_count} repos found, ${#updated_repos[@]} updated, ${total_files} total files collected."
|
||||||
|
if [ ${#updated_repos[@]} -gt 0 ]; then
|
||||||
|
echo ""
|
||||||
|
echo "Updated repos (upload these to project):"
|
||||||
|
for repo in "${updated_repos[@]}"; do
|
||||||
|
echo " -> ${OUTPUT_DIR}/${repo}.txt"
|
||||||
|
done
|
||||||
|
else
|
||||||
|
echo "All repos up to date, nothing to upload."
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user