#!/bin/bash

# Copyright (C) 2024 Pädagogisches Landesinstitut Rheinland-Pfalz
# Copyright (C) 2024 Daniel Teichmann <daniel.teichmann@das-netzwerkteam.de>
# Copyright (C) 2016 by Mike Gabriel <mike.gabriel@it-zukunft-schule.de>
# Adapted for Debian Edu Router. Original origin:
# https://code.it-zukunft-schule.de/cgit/itzks-systems/tree/sbin/e2guardian-update-blacklists

# This script is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.

set -eo pipefail

unset http_proxy
unset https_proxy
unset ftp_proxy

FILTERLISTS_PATH="/var/lib/debian-edu-router/filterlists.d/"
function finish {
	# If we crash, do not keep file there, delete it.
	rm -f "${FILTERLISTS_PATH}/work-in-progress"
}
trap finish EXIT

common_file="/usr/share/debian-edu-router/debian-edu-router.common"

# Load common functions, variables and stuff.
if [ -s "$common_file" ]; then
	source "$common_file"
else
	echo "Could not load common file at "$common_file"."
	exit 0;
fi

LISTS_DIR="/var/lib/debian-edu-router/d-e-r-p.c-f/blacklists.d"
WORK_DIR="/var/lib/debian-edu-router/d-e-r-p.c-f/blacklists_working_dir"
SELECT_CATEGORIES_DIR="/etc/debian-edu-router/e2guardian.d/selected_categories"
TEMPLATES_DIR="/usr/share/debian-edu-router/templates"

BLACKLIST_DL_URI="http://dsi.ut-capitole.fr/blacklists/download/blacklists_for_dansguardian.tar.gz"

function download_blacklists() {
	if [ "$REMOVE_BLACKLIST" = "true" ]; then
		rm -fv "${WORK_DIR}/last-updated.txt"  || true
		rm -fv "${WORK_DIR}/blacklists.tar.gz" || true
		return
	fi

	# Do not spam-download from Uni Toulouse (allow once every 12h).
	if [ -e "${WORK_DIR}/last-updated.txt" ]; then
		date_diff=$(( ($(date +%s) - $(date -f "${WORK_DIR}/last-updated.txt" +%s) )/(60*60) ))
		if [ "$date_diff" -lt 12 ]; then
			SKIP_DOWNLOAD="true"
		fi
	fi

	if [ -z "$SKIP_DOWNLOAD" ] || [ -n "$FORCE_DOWNLOAD" ]; then
		notice_log "Downloading blacklists from '$BLACKLIST_DL_URI'..."
		curl "${BLACKLIST_DL_URI}" 1> "${WORK_DIR}/blacklists.tar.gz" 2>/dev/null
	else
		notice_log "Skipping downloading of blacklists, last update was $date_diff hour(s) ago."
	fi
}

function update_blacklists() {
	if [ "$REMOVE_BLACKLIST" = "true" ]; then
		rm -Rfv "${WORK_DIR}/.blacklists.new" || true
		rm -Rfv "${WORK_DIR}/blacklists.old"  || true
		rm -Rfv "${WORK_DIR}/blacklists"      || true
		return
	fi

	notice_log "Updating blacklists..."
	mkdir -p "${WORK_DIR}/"

	if [ -e "${WORK_DIR}/.blacklists.new" ]; then
		rm -Rf "${WORK_DIR}/.blacklists.new"
	fi

	mkdir -p "${WORK_DIR}/.blacklists.new"

	cd "${WORK_DIR}/.blacklists.new"
	tries="0"
	while ! tar xzf "${WORK_DIR}/blacklists.tar.gz"; do
		# Maybe CTRL-C'd process, tar-ball broken?
		FORCE_DOWNLOAD=true download_blacklists

		# Allow 3 tries.
		tries=$(($tries+1))
		if [ "$tries" -gt 2 ]; then
			error_log "Could not download blacklists tar ball!"
			exit 1;
		fi
	done

	mv blacklists/* .
	rmdir blacklists/
	cd - 1>/dev/null

	if [ -e "${WORK_DIR}/blacklists.old" ]; then
		rm -Rf "${WORK_DIR}/blacklists.old"
	fi

	if [ -d "${WORK_DIR}/blacklists" ]; then
		mv "${WORK_DIR}/blacklists" "${WORK_DIR}/blacklists.old"
	fi

	if [ ! -e "${WORK_DIR}/blacklists" ]; then
		mv "${WORK_DIR}/.blacklists.new" "${WORK_DIR}/blacklists"

		LANG=C date 1> "${WORK_DIR}/last-updated.txt"

		chown root:root -Rf "${WORK_DIR}/blacklists"
		chmod -Rf a+r "${WORK_DIR}/blacklists"
		cd "${WORK_DIR}/blacklists"
		find * -type d | while read dir; do
			chmod a+x "${dir}"
		done
		cd - 1> /dev/null

	fi
}

function update_whitelists() {
	if [ "$REMOVE_BLACKLIST" = "true" ]; then
		rm -Rfv "${WORK_DIR}/whitelists"      || true
		rm -Rfv "${WORK_DIR}/whitelists.old"  || true
		return
	fi

	notice_log "Updating whitelists..."

	if [ -d "${WORK_DIR}/whitelists.old" ]; then
		rm -Rf "${WORK_DIR}/whitelists.old"
	fi
	if [ -d "${WORK_DIR}/whitelists" ]; then
		mv "${WORK_DIR}/whitelists" "${WORK_DIR}/whitelists.old"
	fi

	mkdir -p "${WORK_DIR}/whitelists"
	find "${WORK_DIR}"/blacklists/*/usage | while read usage; do
		# skip symlinked dirs
		if [ -h "$(dirname "${usage}")" ]; then
			continue
		fi
		if grep -q "white" < "${usage}" && ! grep -q "black" < "${usage}"; then
			mv "$(dirname "${usage}")" "${WORK_DIR}/whitelists"
		fi
	done
}

function init_config() {
	if [ "$REMOVE_BLACKLIST" = "true" ]; then
		# Do not remove templates, they are needed for re-creation.
		rm -fv "${SELECT_CATEGORIES_DIR}/blacklisted_categories" || true
		rm -fv "${SELECT_CATEGORIES_DIR}/whitelisted_categories" || true
		rmdir "${SELECT_CATEGORIES_DIR}"                         || true
		return
	fi

	notice_log "Initializing config..."

	mkdir -p "${SELECT_CATEGORIES_DIR}"

	if [ ! -e "${SELECT_CATEGORIES_DIR}/blacklisted_categories" ]; then
		cp "${TEMPLATES_DIR}/header_blacklisted_categories" "${SELECT_CATEGORIES_DIR}/blacklisted_categories"
		find "${WORK_DIR}/blacklists/"* -maxdepth 1  -type d | awk -F '/' '{print $NF}' >> "${SELECT_CATEGORIES_DIR}/blacklisted_categories"
	fi
	if [ ! -e "${SELECT_CATEGORIES_DIR}/whitelisted_categories" ]; then
		touch "${SELECT_CATEGORIES_DIR}/whitelisted_categories"
		cp "${TEMPLATES_DIR}/header_whitelisted_categories" "${SELECT_CATEGORIES_DIR}/whitelisted_categories"
		find "${WORK_DIR}/whitelists/"* -maxdepth 1  -type d | awk -F '/' '{print $NF}' >> "${SELECT_CATEGORIES_DIR}/whitelisted_categories"
	fi
}

function rearrange_lists() {
	if [ "$REMOVE_BLACKLIST" = "true" ]; then
		return
	fi

	notice_log "Rearranging lists..."

	cat "${SELECT_CATEGORIES_DIR}/whitelisted_categories" | while read whitelisted; do
		if [ -d "${WORK_DIR}/blacklists/${whitelisted}" ] && \
		   [ ! -h "${WORK_DIR}/blacklists/${whitelisted}" ]; then
			mv "${WORK_DIR}/blacklists/${whitelisted}" "${WORK_DIR}/whitelists"
		fi
	done

	cat "${SELECT_CATEGORIES_DIR}/blacklisted_categories" | while read blacklisted; do
		if [ -d "${WORK_DIR}/whitelists/${blacklisted}" ] && \
		   [ ! -h "${WORK_DIR}/whitelists/${blacklisted}" ]; then
			mv "${WORK_DIR}/whitelists/${blacklisted}" "${WORK_DIR}/blacklists"
		fi
	done
}

function update_lists() {
	if [ "$REMOVE_BLACKLIST" = "true" ]; then
		rm -Rfv "${LISTS_DIR}" || true
		return
	fi

	action="${1}"
	type="${2}"
	list="${3}"

	notice_log "Updating $action '$list' with type '$type'..."

	mkdir -p "${LISTS_DIR}"
	touch "${LISTS_DIR}/${list}"

	# Remove all commented + uncommented include's with $action in path.
	sed -i "${LISTS_DIR}/${list}" -Ee "/(#|).Include<.*\/${action}s\/.*>$/d"

	find "${WORK_DIR}/${action}s/"*"/${type}" -maxdepth 1 -type f 2>/dev/null | sort | while read path; do
		# ignore symlinks pointing to another category dir
		if [ ! -h "$(dirname $path)" ]; then
			# Echo unselected include statement.
			echo "#.Include<${path}>" >> "${LISTS_DIR}/${list}"
		fi
	done

	# Activate previously unselected categories (if selected by admin).
	cat "${SELECT_CATEGORIES_DIR}/${action}ed_categories" | grep -v "^#" | while read category; do
		sed -i "${LISTS_DIR}/${list}" -Ee "s/#(.Include<.*\/${category}\/${type})/\1/"
	done
}

function print_usage() {
	notice_log "Usage: $0 [-r|--remove-blacklist]"
	notice_log "Default behavior (without arguments) is to just download and update blacklists."
	exit 0
}

function parse_arguments() {
	# Parse command line arguments
	while [[ $# -gt 0 ]]; do
		case "$1" in
			-r|--remove-blacklist)
				REMOVE_BLACKLIST=true
				shift
				;;
			-h|--help)
				print_usage
				;;
			*)
				echo "$0 Unknown argument: $1"
				print_usage
				exit 1
				;;
		esac
	done
}

# Do not let Squid-ACL-watcher reload all instances of Squid again and again.
echo "$(LANG=c date)" > "${FILTERLISTS_PATH}/work-in-progress"

parse_arguments "$@"
download_blacklists
update_blacklists
update_whitelists
init_config
rearrange_lists
update_lists blacklist domains bannedsitelist
update_lists blacklist urls bannedurllist
update_lists blacklist expressions bannedregexpurllist
#update_lists whitelist domains exceptionsitelist
#update_lists whitelist urls exceptionurllist

if [ "$REMOVE_BLACKLIST" = "true" ]; then
	rmdir "${LISTS_DIR}"             || true
	rmdir "${WORK_DIR}"              || true
	rmdir "${SELECT_CATEGORIES_DIR}" || true

	notice_log "Removed all blacklist files..."
	exit 0
fi

# Squid may be reloaded now too :)
rm -f "${FILTERLISTS_PATH}/work-in-progress"
systemctl start squid_d-e-r_acl_watcher.service 1>/dev/null || true

manage_unit restart e2guardian_d-e-r.service
