Skip to content
Snippets Groups Projects
replace.py 4.69 KiB
Newer Older
# -*- coding: utf-8 -*-
##############################################################################
#
#    Email marketing click & read tracker
#    Copyright (C) 2011 Florent Aide, <florent.aide@gmail.com>
#    Copyright (C) 2011 XCG Consulting (http://www.xcg-consulting.fr/)
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Affero General Public License as
#    published by the Free Software Foundation, either version 3 of the
#    License, or (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
##############################################################################
__author__ = 'faide'

import uuid
import re

simple_text_url_re = re.compile(r'(http://[-a-zA-Z0-9_/.?&=]+)')
html_href_re = re.compile(r'href="([-a-zA-Z0-9_/.?&=:]+)"')
html_imgsrc_re = re.compile(r'img src="([-a-zA-Z0-9_/.?&=:]+)"')

def insert_tracker_in_text(text, tracker_base, activity_id):

    tracks = []

    def repl(match_obj):
        real_url = match_obj.group(1)
        uid = str(uuid.uuid4())
        values = dict(
            trackitem_uuid = uid,
            real_url = real_url,
            campaign_activity_id = activity_id,
            )
        tracks.append(values)
        return "%s/%s" % (tracker_base, uid)

    return simple_text_url_re.sub(repl, text), tracks

def insert_tracker_in_html(html, tracker_base, activity_id):

    tracks = []

    def repl(match_obj):
        real_url = match_obj.group(1)
        uid = str(uuid.uuid4())
        values = dict(
            trackitem_uuid = uid,
            real_url = real_url,
            campaign_activity_id = activity_id,
            )
        tracks.append(values)
        return 'href="%s/%s"' % (tracker_base, uid)

    def repl2(match_obj):
        real_url = match_obj.group(1)
        uid = str(uuid.uuid4())
        values = dict(
            trackitem_uuid = uid,
            real_url = real_url,
            campaign_activity_id = activity_id,
            )
        tracks.append(values)
        return 'img src="%s/%s"' % (tracker_base, uid)

    res = html_href_re.sub(repl, html)
    return html_imgsrc_re.sub(repl2, res), tracks

if __name__ == '__main__':
    url = "http://some.org/a_3/super-script.php?uid=3&someval=4"
    text = url
    base_html = '<a href="%s"> Some Super Promo </a>'
    html = base_html % url
    img_url ='http://some.org/b_5/super-img.php?img=45&value=33'
    base_img = '<img src="%s" />'
    imgsrc = base_img % img_url

    print "*"*35
    print "Testing text replacement"
    new_text, values = insert_tracker_in_text(text, 'BASE', '1')

    assert len(values) == 1, "Values should contain only one record"

    value = values[0]
    track_uuid = value.get('trackitem_uuid', None)

    assert track_uuid, "Track uuid should not be None"

    real_url = value.get("real_url", None)
    print "Real URL was: %s" % real_url

    assert real_url == url, "Real URL should have been %s, not %s" % (
                url, real_url)
    assert new_text == '%s/%s' % ('BASE', track_uuid)


    print "*"*35
    print "Testing HTML replacement"
    new_html, values = insert_tracker_in_html(html, 'BASE', '1')

    print new_html
    assert len(values) == 1, "Values should have one record, not %s" % len(values)

    value = values[0]
    track_uuid = value.get('trackitem_uuid', None)

    assert track_uuid, "Track uuid should not be None"

    real_url = value.get("real_url", None)
    print "Real URL was: %s" % real_url

    assert real_url == url, "Real URL should have been %s, not %s" % (
        url, real_url)
    expected_html = base_html % ('BASE/' + track_uuid)
    assert new_html == expected_html
    print "Testing img src replacement"
    new_html, values = insert_tracker_in_html(imgsrc, 'BASE', '1')
    print new_html
    assert len(values) == 1, "Values should have one record, not %s" % len(values)

    value = values[0]
    track_uuid = value.get('trackitem_uuid', None)

    assert track_uuid, "Track uuid should not be None"

    real_url = value.get("real_url", None)
    print "Real URL was: %s" % real_url

    assert real_url == img_url, "Real URL should have been %s, not %s" % (
        img_url, real_url)
    expected_html = base_img % ('BASE/' + track_uuid)
    assert new_html == expected_html, "new html should have been %s, not %s" % (
        expected_html, new_html
    )