#!/bin/bash

# Copyright (C) 2024-2025 Pädagogisches Landesinstitut Rheinland-Pfalz
# Copyright (C) 2024-2025 Daniel Teichmann <daniel.teichmann@das-netzwerkteam.de>
# Copyright (C) 2016-2025 Mike Gabriel <mike.gabriel@it-zukunft-schule.de>
# Adapted for Debian Edu Router. Original origin:
# https://code.it-zukunft-schule.de/cgit/itzks-systems/tree/sbin/e2guardian-update-blacklists

# This script is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.

set -eo pipefail

unset http_proxy
unset https_proxy
unset ftp_proxy

FILTERLISTS_PATH="/var/lib/debian-edu-router/filterlists.d/"
function finish {
	# If we crash, do not keep file there, delete it.
	rm -f "${FILTERLISTS_PATH}/work-in-progress"
}
trap finish EXIT

common_file="/usr/share/debian-edu-router/debian-edu-router.common"

# Load common functions, variables and stuff.
if [ -s "$common_file" ]; then
	source "$common_file"
else
	echo "Could not load common file at "$common_file"."
	exit 0;
fi

LISTS_DIR="/var/lib/debian-edu-router/d-e-r-p.c-f/filterlists-toulouse.e2guardian.d"
WORK_DIR="/var/lib/debian-edu-router/d-e-r-p.c-f/filterlists-toulouse.e2guardian.working_dir"
SELECTED_CATEGORIES_DIR="/var/lib/debian-edu-router/d-e-r-p.c-f/filterlists-toulouse.e2guardian.selected_categories"
TEMPLATES_DIR="/usr/share/debian-edu-router/templates"

BLACKLIST_DL_URI="http://dsi.ut-capitole.fr/blacklists/download/blacklists_for_dansguardian.tar.gz"

function download_blacklists() {
	if [ "${REMOVE_FILTERLISTS}" = "true" ]; then
		rm -fv "${WORK_DIR}/last-updated.txt"  || true
		rm -fv "${WORK_DIR}/blacklists.tar.gz" || true
		return
	fi

	# Do not spam-download from Uni Toulouse (allow once every 12h).
	if [ -e "${WORK_DIR}/last-updated.txt" ]; then
		date_diff=$(( ($(date +%s) - $(date -f "${WORK_DIR}/last-updated.txt" +%s) )/(60*60) ))
		if [ "${date_diff}" -lt 12 ]; then
			SKIP_DOWNLOAD="true"
		fi
	fi

	if [ -z "${SKIP_DOWNLOAD}" ] || [ -n "${FORCE_DOWNLOAD}" ]; then
		notice_log "Downloading blacklists from '${BLACKLIST_DL_URI}'..."
		curl "${BLACKLIST_DL_URI}" 1> "${WORK_DIR}/blacklists.tar.gz" 2>/dev/null
	else
		notice_log "Skipping downloading of blacklists, last update was ${date_diff} hour(s) ago."
	fi
}

function update_blacklists() {
	if [ "${REMOVE_FILTERLISTS}" = "true" ]; then
		rm -Rfv "${WORK_DIR}/.blacklists.new" || true
		rm -Rfv "${WORK_DIR}/blacklists.old"  || true
		rm -Rfv "${WORK_DIR}/blacklists"      || true
		return
	fi

	notice_log "Updating blacklists..."

	if [ -e "${WORK_DIR}/.blacklists.new" ]; then
		rm -Rf "${WORK_DIR}/.blacklists.new"
	fi

	mkdir -p "${WORK_DIR}/.blacklists.new"

	cd "${WORK_DIR}/.blacklists.new"
	tries="0"
	while ! tar xzf "${WORK_DIR}/blacklists.tar.gz"; do
		# Maybe CTRL-C'd process, tar-ball broken?
		FORCE_DOWNLOAD=true download_blacklists

		# Allow 3 tries.
		tries=$(($tries+1))
		if [ "$tries" -gt 2 ]; then
			error_log "Could not download blacklists tar ball!"
			exit 1;
		fi
	done

	mv blacklists/* .
	rmdir blacklists/
	cd - 1>/dev/null

	if [ -e "${WORK_DIR}/blacklists.old" ]; then
		rm -Rf "${WORK_DIR}/blacklists.old"
	fi

	if [ -d "${WORK_DIR}/blacklists" ]; then
		mv "${WORK_DIR}/blacklists" "${WORK_DIR}/blacklists.old"
	fi

	if [ ! -e "${WORK_DIR}/blacklists" ]; then
		mv "${WORK_DIR}/.blacklists.new" "${WORK_DIR}/blacklists"

		LANG=C.UTF-8 date 1> "${WORK_DIR}/last-updated.txt"

		chown root:root -Rf "${WORK_DIR}/blacklists"
		chmod -Rf a+r "${WORK_DIR}/blacklists"
		cd "${WORK_DIR}/blacklists"
		find * -type d | while read dir; do
			chmod a+x "${dir}"
		done
		cd - 1> /dev/null

	fi
}

function update_whitelists() {
	if [ "${REMOVE_FILTERLISTS}" = "true" ]; then
		rm -Rfv "${WORK_DIR}/whitelists"      || true
		rm -Rfv "${WORK_DIR}/whitelists.old"  || true
		return
	fi

	notice_log "Updating whitelists..."

	if [ -d "${WORK_DIR}/whitelists.old" ]; then
		rm -Rf "${WORK_DIR}/whitelists.old"
	fi
	if [ -d "${WORK_DIR}/whitelists" ]; then
		mv "${WORK_DIR}/whitelists" "${WORK_DIR}/whitelists.old"
	fi

	mkdir -p "${WORK_DIR}/whitelists"
	find "${WORK_DIR}"/blacklists/*/usage | while read usage; do
		# skip symlinked dirs
		if [ -h "$(dirname "${usage}")" ]; then
			continue
		fi
		if grep -q "white" < "${usage}" && ! grep -q "black" < "${usage}"; then
			mv "$(dirname "${usage}")" "${WORK_DIR}/whitelists"
		fi
	done
}

function init_config() {
	if [ "${PURGE_FILTERLISTS}" = "true" ] && [ "${REMOVE_FILTERLISTS}" = "true" ]; then
		# Do not remove templates, they are needed for re-creation.
		rm -fv "${SELECTED_CATEGORIES_DIR}/blacklisted_categories" || true
		rm -fv "${SELECTED_CATEGORIES_DIR}/whitelisted_categories" || true
		rmdir  "${SELECTED_CATEGORIES_DIR}"                        || true
		return
	elif [[ -d "${SELECTED_CATEGORIES_DIR}" ]] && [ "${REMOVE_FILTERLISTS}" = "true" ]; then
		notice_log "Selection files for blacklist/whitelist categories won't be deleted. Please purge the content filter package to fully remove them."
	fi

	notice_log "Initializing config..."

	mkdir -p "${SELECTED_CATEGORIES_DIR}"

	if [ ! -e "${SELECTED_CATEGORIES_DIR}/blacklisted_categories" ]; then
		cp "${TEMPLATES_DIR}/header_blacklisted_categories" "${SELECTED_CATEGORIES_DIR}/blacklisted_categories"
		find "${WORK_DIR}/blacklists/"* -maxdepth 1  -type d | awk -F '/' '{print $NF}' >> "${SELECTED_CATEGORIES_DIR}/blacklisted_categories"
		cat "${SELECTED_CATEGORIES_DIR}/blacklisted_categories" | grep -v "^[[:space:]]*#.*" | while read blacklisted; do
			if ! grep -q "^${blacklisted}\$" < ${TEMPLATES_DIR}/blacklists-toulouse.e2guardian.selected_categories/default_selected_blacklisted_categories; then
				sed -i "${SELECTED_CATEGORIES_DIR}/blacklisted_categories" -r -e "s/^(${blacklisted})\$/#\1/g"
			fi
		done
	fi
	if [ ! -e "${SELECTED_CATEGORIES_DIR}/whitelisted_categories" ]; then
		touch "${SELECTED_CATEGORIES_DIR}/whitelisted_categories"
		cp "${TEMPLATES_DIR}/header_whitelisted_categories" "${SELECTED_CATEGORIES_DIR}/whitelisted_categories"
		find "${WORK_DIR}/whitelists/"* -maxdepth 1  -type d | awk -F '/' '{print $NF}' >> "${SELECTED_CATEGORIES_DIR}/whitelisted_categories"
		cat "${SELECTED_CATEGORIES_DIR}/whitelisted_categories" | grep -v "^[[:space:]]*#.*" | while read whitelisted; do
			if ! grep -q "^${whitelisted}\$" < ${TEMPLATES_DIR}/blacklists-toulouse.e2guardian.selected_categories/default_selected_whitelisted_categories; then
				sed -i "${SELECTED_CATEGORIES_DIR}/whitelisted_categories" -r -e "s/^(${whitelisted})\$/#\1/g"
			fi
		done
	fi
}

function rearrange_lists() {
	if [ "${REMOVE_FILTERLISTS}" = "true" ]; then
		return
	fi

	notice_log "Rearranging lists..."

	cat "${SELECTED_CATEGORIES_DIR}/whitelisted_categories" | while read whitelisted; do
		if [ -d "${WORK_DIR}/blacklists/${whitelisted}" ] && \
		   [ ! -h "${WORK_DIR}/blacklists/${whitelisted}" ]; then
			mv "${WORK_DIR}/blacklists/${whitelisted}" "${WORK_DIR}/whitelists"
		fi
	done

	cat "${SELECTED_CATEGORIES_DIR}/blacklisted_categories" | while read blacklisted; do
		if [ -d "${WORK_DIR}/whitelists/${blacklisted}" ] && \
		   [ ! -h "${WORK_DIR}/whitelists/${blacklisted}" ]; then
			mv "${WORK_DIR}/whitelists/${blacklisted}" "${WORK_DIR}/blacklists"
		fi
	done
}

function update_lists() {
	action="${1}"
	type="${2}"
	list="${3}"

	if [ "${REMOVE_FILTERLISTS}" = "true" ]; then
		rm -fv "${LISTS_DIR}/${list}" || true
		rm -fv "/etc/debian-edu-router/e2guardian.d/lists/common/${list}"
		return
	fi

	notice_log "Updating $action '$list' with type '$type'..."

	mkdir -p "${LISTS_DIR}"
	touch "${LISTS_DIR}/${list}"

	# Remove all commented + uncommented include's with $action in path.
	sed -i "${LISTS_DIR}/${list}" -Ee "/(#|).Include<.*\/${action}s\/.*>$/d"

	find "${WORK_DIR}/${action}s/"*"/${type}" -maxdepth 1 -type f 2>/dev/null | sort | while read path; do
		# ignore symlinks pointing to another category dir
		if [ ! -h "$(dirname $path)" ]; then
			# Echo unselected include statement.
			echo "#.Include<${path}>" >> "${LISTS_DIR}/${list}"
		fi
	done

	set +o pipefail
	# Activate previously unselected categories (if selected by admin).
	cat "${SELECTED_CATEGORIES_DIR}/${action}ed_categories" | grep -v "^#" | while read category; do
		sed -i "${LISTS_DIR}/${list}" -Ee "s/#(.Include<.*\/${category}\/${type})/\1/"
	done
	set -o pipefail

	ln -sf "${LISTS_DIR}/${list}" "/etc/debian-edu-router/e2guardian.d/lists/common/"
}

function print_usage() {
	notice_log "Usage: $0 [[-e|--enable-filterlists]|[-r|--remove-filterlists]|[-p|--purge-filterlists]]"
	notice_log "Default behavior (without arguments) is to just download and update filterlists if already present."
	exit 0
}

function parse_arguments() {
	# Parse command line arguments
	while [[ $# -gt 0 ]]; do
		case "$1" in
			-e|--enable-filterlists)
				ENABLE_FILTERLISTS=true
				shift
				;;
			-p|--purge-filterlists)
				REMOVE_FILTERLISTS=true
				PURGE_FILTERLISTS=true
				shift
				;;
			-r|--remove-filterlists)
				REMOVE_FILTERLISTS=true
				shift
				;;
			-h|--help)
				print_usage
				;;
			*)
				echo "$0 Unknown argument: $1"
				print_usage
				exit 1
				;;
		esac
	done
}

###############################################################################
# Function: migrate_data
# Handle data created by previous version of this script.
# Globals:
#   SELECTED_CATEGORIES_DIR
# Arguments:
#   None
# Returns:
#   None
###############################################################################
function migrate_data() {

	## pre-2.13.0~beta3:

	if [[ -d "/var/lib/debian-edu-router/d-e-r-p.c-f/blacklists.d" ]] || \
	   [[ -d "/var/lib/debian-edu-router/d-e-r-p.c-f/blacklists_working_dir" ]]; then
		debug_log "Found filterlists at previously used paths, removing them:"
		for old_path in "/var/lib/debian-edu-router/d-e-r-p.c-f/blacklists.d" \
		                "/var/lib/debian-edu-router/d-e-r-p.c-f/blacklists_working_dir"; do
			if [[ -d "${old_path}" ]]; then
				rm -Rf "${old_path}"
				debug_log "  Deleting ${old_path}"
			fi
		done
	fi

	if [[ -d "/etc/debian-edu-router/e2guardian.d/selected_categories" ]]; then
		debug_log "Found filterlists data at previously used paths, migrating them:"
		for old_path in "/etc/debian-edu-router/e2guardian.d/selected_categories"; do
			if [[ -d "${old_path}" ]]; then
				mv "${old_path}" "${SELECTED_CATEGORIES_DIR}"
				debug_log "  Moving ${old_path} -> ${SELECTED_CATEGORIES_DIR}"
			fi
		done
	fi

}

# Do not let Squid-ACL-watcher reload all instances of Squid again and again.
echo "$(LANG=C.UTF-8 date)" > "${FILTERLISTS_PATH}/work-in-progress"

parse_arguments "$@"

if [ "${REMOVE_FILTERLISTS}" = "true" ]; then
	notice_log "Removing all filterlist files related to e2guardian-based domain and URL filtering!"
fi

if [[ "${ENABLE_FILTERLISTS}" = "true" ]]; then
	migrate_data
	mkdir -p "${WORK_DIR}/"
fi

if [[ ! -d "${WORK_DIR}" ]]; then
	notice_log "refresh-blacklists_e2guardian: e2guardian-based filterlists are disabled. Skipping. (No WORK_DIR found)"
	exit 0
fi

download_blacklists
update_blacklists
update_whitelists
init_config
rearrange_lists
update_lists blacklist domains bannedsitelist
update_lists blacklist urls bannedurllist
update_lists blacklist expressions bannedregexpurllist
update_lists whitelist domains exceptionsitelist
update_lists whitelist urls exceptionurllist

if [ "${PURGE_FILTERLISTS}" = "true" ] && [ "${REMOVE_FILTERLISTS}" = "true" ]; then
	rmdir "${LISTS_DIR}"               || true
	rmdir "${WORK_DIR}"                || true

	notice_log "Purged all filterlist files..."
	exit 0
elif [ "${REMOVE_FILTERLISTS}" = "true" ]; then
	# Filterlists download via loginmenu will only happen if WORK_DIR is present.
	# So keeping LISTS_DIR and WORK_DIR here...
	# rmdir "${LISTS_DIR}"               || true
	# rmdir "${WORK_DIR}"                || true

	notice_log "Removed all filterlist files..."
	exit 0
fi

# Squid may be reloaded now too :)
rm -f "${FILTERLISTS_PATH}/work-in-progress"

if [ "${NO_SERVICE_RESTARTS}" != "true" ]; then
	manage_unit restart e2guardian_d-e-r.service
	notice_log "Reloaded e2guardian_d-e-r.service."
fi
