aboutsummaryrefslogtreecommitdiff
blob: 3b62e226fdb0093e0da1cabc95f33278b5296ce5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/bin/bash
# Copyright 2014-2019 Ulrich Müller
# Distributed under the terms of the GNU GPL version 2 or later
# Author: Ulrich Müller <ulm@gentoo.org>

shopt -s extglob

portdir=$(portageq get_repo_path / gentoo)
cd "${portdir}" || exit 1

count=0
while read line; do
    path=${line%:*}
    type=${line##*:*( )}
    case ${type} in
        "application/octet-stream; charset=binary" \
        | "application/octet-stream; charset=unknown" \
        | "binary; charset=binary")
            # GNU Info files (or patches to them) can contain the following
            # control characters that produce false positives:
            # - 0x1f, followed by LF or FF
            # - 0x7f (DEL), preceded by "Node:" or "Ref:" in the same line
            # Filter such characters and reiterate
            line=$(sed -e 's/\x1f\f\?$//;/\(Node\|Ref\):/s/\x7f//' "${path}" \
                | file -i -)
            type=${line##*:*( )}
            ;;
    esac
    case ${type} in
        text/*) ;;                            # text file
        application/*"; charset=us-ascii") ;;
        application/*"; charset=utf-8") ;;
        "image/svg; charset=us-ascii") ;;     # SVG image
        "image/svg+xml; charset=us-ascii") ;; # SVG image
        "image/x-xpmi; charset=us-ascii") ;;  # XPM image
        "message/rfc822; charset=us-ascii") ;;
        *)
            size=$(stat -c "%s" "${path}")
            echo "${path#./}: ${type} (size=${size})"
            (( count++ ))
            ;;
    esac
done < <(find \( -path ./distfiles -o -path ./local -o -path ./metadata \
        -o -path ./packages \) -prune -o ! -type d ! -name 'Manifest*.gz' \
        -exec file -ih '{}' + | sort)

[[ ${count} -gt 0 ]] || echo "No binary files found. :-)"