// External Imports
import flow from 'lodash/fp/flow.js';
import get from 'lodash/fp/get.js';
import mapValues from 'lodash/fp/mapValues.js';
import pick from 'lodash/fp/pick.js';
import uniq from 'lodash/uniq.js';
import sanitizeHtml from 'sanitize-html';

// Local Variables
// All content item fields that can contain HTML with script tags.
const contentFields = ['content', 'excerpt'];

// Local Functions
// TODO: remove when USATECH-1712 gets addressed
// Some of our embeds for e.g. instagram do not include the protocol in their URL.
// Sanitize-html considers this to be invalid and fully removes them automatically.
// As a temporary solution, we are patching this here until we can try to address it
// in Wordpress in
// https://trello.com/c/6m99Z7ec/1712-spike-investigation-on-whether-we-could-solve-some-embeds-problems-in-wordpress-instead-of-the-frontend
// The regexp below matches a protocol-less URL inside of a src tag and adds https as the protocol
const addMissingHttpsProtocolToEmbedsURLs = (content) =>
  content.replaceAll(/"(\/\/(www\.)?([\dA-Za-z-]+\.)+[\dA-Za-z-][^"]*)"/g, 'https:$1');

const getUniqueValidScriptsUrl = (scripts) =>
  uniq(scripts.filter(Boolean).map(get(['attribs', 'src']))).filter(Boolean);

/**
 * Filter any script tags found. Add filtered scripts to collection.
 * For more documentation on this see docs/article-content-embeds.md
 *
 * @param {string} content
 * @return {string}
 */
const filterContentFields = (contentItem) => {
  const scripts = [];

  const filterContent = (content) =>
    sanitizeHtml(addMissingHttpsProtocolToEmbedsURLs(content), {
      allowedAttributes: {
        '*': ['*'],
        p: ['style'],
      },
      allowedTags: false,
      allowVulnerableTags: true,
      exclusiveFilter: (frame) => {
        if (frame.tag !== 'script') {
          return false;
        }

        scripts.push(frame);
        return true;
      },
      parseStyleAttributes: false,
    });

  const filteredContent = flow(pick(contentFields), mapValues(filterContent))(contentItem);

  return {
    filteredContent,
    scripts: getUniqueValidScriptsUrl(scripts),
  };
};

// Module Exports
export { filterContentFields };
