updated scripts
This commit is contained in:
+23
-6
@@ -2,6 +2,7 @@
|
||||
# collect-repos.sh
|
||||
# Run from ~/git/ - combines all files from each repo into a single .txt file
|
||||
# Output goes to ~/git/collected/ folder
|
||||
# Output files are UTF-8 with BOM for maximum compatibility
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -47,7 +48,7 @@ should_skip_file() {
|
||||
}
|
||||
|
||||
# Known text file extensions - always include these
|
||||
TEXT_EXTENSIONS=("sh" "bash" "zsh" "yaml" "yml" "json" "toml" "ini" "cfg" "conf" "txt" "md" "py" "js" "ts" "html" "css" "xml" "env" "service" "timer" "sql" "lua" "rb" "go" "rs" "java" "c" "h" "cpp" "hpp" "Makefile" "Dockerfile" "csv" "log" "properties" "rules")
|
||||
TEXT_EXTENSIONS=("sh" "bash" "zsh" "yaml" "yml" "json" "toml" "ini" "cfg" "conf" "txt" "md" "py" "js" "ts" "html" "css" "xml" "env" "service" "timer" "sql" "lua" "rb" "go" "mod" "sum" "rs" "java" "c" "h" "cpp" "hpp" "Makefile" "Dockerfile" "csv" "log" "properties" "rules")
|
||||
|
||||
is_known_text() {
|
||||
local file="$1"
|
||||
@@ -78,6 +79,20 @@ is_binary() {
|
||||
return 1
|
||||
}
|
||||
|
||||
# Cat a file, stripping UTF-8 BOM if present (avoids inline BOMs in combined output)
|
||||
cat_strip_bom() {
|
||||
local file="$1"
|
||||
# Check if file starts with UTF-8 BOM (EF BB BF)
|
||||
local header
|
||||
header=$(head -c 3 "$file" | od -A n -t x1 | tr -d ' \n')
|
||||
if [ "$header" = "efbbbf" ]; then
|
||||
# Skip first 3 bytes (the BOM)
|
||||
tail -c +4 "$file"
|
||||
else
|
||||
cat "$file"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== Repo Collector ==="
|
||||
echo "Timestamp: $TIMESTAMP"
|
||||
echo "Working dir: $SCRIPT_DIR"
|
||||
@@ -132,14 +147,16 @@ for repo_dir in */; do
|
||||
updated_repos+=("$repo_dir")
|
||||
echo " Updating: ${repo_dir}/"
|
||||
|
||||
# Write header
|
||||
# Write UTF-8 BOM + header
|
||||
# printf writes raw bytes; echo writes the text header after it
|
||||
printf '\xEF\xBB\xBF' > "$output_file"
|
||||
{
|
||||
echo "================================================================================"
|
||||
echo "Repository: ${repo_dir}"
|
||||
echo "Collected: ${TIMESTAMP}"
|
||||
echo "================================================================================"
|
||||
echo ""
|
||||
} > "$output_file"
|
||||
} >> "$output_file"
|
||||
|
||||
# Find all files, excluding skip dirs
|
||||
# First pass: collect READMEs
|
||||
@@ -147,7 +164,7 @@ for repo_dir in */; do
|
||||
rel_path="${file#${repo_dir}/}"
|
||||
{
|
||||
echo "--- FILE: ${rel_path} ---"
|
||||
cat "$file"
|
||||
cat_strip_bom "$file"
|
||||
echo ""
|
||||
echo ""
|
||||
} >> "$output_file"
|
||||
@@ -200,10 +217,10 @@ for repo_dir in */; do
|
||||
continue
|
||||
fi
|
||||
|
||||
# Include the file
|
||||
# Include the file (strip BOM from source to avoid inline BOMs)
|
||||
{
|
||||
echo "--- FILE: ${rel_path} ---"
|
||||
cat "$file"
|
||||
cat_strip_bom "$file"
|
||||
echo ""
|
||||
echo ""
|
||||
} >> "$output_file"
|
||||
|
||||
Reference in New Issue
Block a user