updated scripts

This commit is contained in:
2026-02-16 08:11:47 +01:00
parent 49e8a9794a
commit 571362d9fb
2 changed files with 245 additions and 6 deletions
+23 -6
View File
@@ -2,6 +2,7 @@
# collect-repos.sh
# Run from ~/git/ - combines all files from each repo into a single .txt file
# Output goes to ~/git/collected/ folder
# Output files are UTF-8 with BOM for maximum compatibility
set -euo pipefail
@@ -47,7 +48,7 @@ should_skip_file() {
}
# Known text file extensions - always include these
TEXT_EXTENSIONS=("sh" "bash" "zsh" "yaml" "yml" "json" "toml" "ini" "cfg" "conf" "txt" "md" "py" "js" "ts" "html" "css" "xml" "env" "service" "timer" "sql" "lua" "rb" "go" "rs" "java" "c" "h" "cpp" "hpp" "Makefile" "Dockerfile" "csv" "log" "properties" "rules")
TEXT_EXTENSIONS=("sh" "bash" "zsh" "yaml" "yml" "json" "toml" "ini" "cfg" "conf" "txt" "md" "py" "js" "ts" "html" "css" "xml" "env" "service" "timer" "sql" "lua" "rb" "go" "mod" "sum" "rs" "java" "c" "h" "cpp" "hpp" "Makefile" "Dockerfile" "csv" "log" "properties" "rules")
is_known_text() {
local file="$1"
@@ -78,6 +79,20 @@ is_binary() {
return 1
}
# Cat a file, stripping UTF-8 BOM if present (avoids inline BOMs in combined output)
cat_strip_bom() {
local file="$1"
# Check if file starts with UTF-8 BOM (EF BB BF)
local header
header=$(head -c 3 "$file" | od -A n -t x1 | tr -d ' \n')
if [ "$header" = "efbbbf" ]; then
# Skip first 3 bytes (the BOM)
tail -c +4 "$file"
else
cat "$file"
fi
}
echo "=== Repo Collector ==="
echo "Timestamp: $TIMESTAMP"
echo "Working dir: $SCRIPT_DIR"
@@ -132,14 +147,16 @@ for repo_dir in */; do
updated_repos+=("$repo_dir")
echo " Updating: ${repo_dir}/"
# Write header
# Write UTF-8 BOM + header
# printf writes raw bytes; echo writes the text header after it
printf '\xEF\xBB\xBF' > "$output_file"
{
echo "================================================================================"
echo "Repository: ${repo_dir}"
echo "Collected: ${TIMESTAMP}"
echo "================================================================================"
echo ""
} > "$output_file"
} >> "$output_file"
# Find all files, excluding skip dirs
# First pass: collect READMEs
@@ -147,7 +164,7 @@ for repo_dir in */; do
rel_path="${file#${repo_dir}/}"
{
echo "--- FILE: ${rel_path} ---"
cat "$file"
cat_strip_bom "$file"
echo ""
echo ""
} >> "$output_file"
@@ -200,10 +217,10 @@ for repo_dir in */; do
continue
fi
# Include the file
# Include the file (strip BOM from source to avoid inline BOMs)
{
echo "--- FILE: ${rel_path} ---"
cat "$file"
cat_strip_bom "$file"
echo ""
echo ""
} >> "$output_file"