Source code for scrubadub.detectors.credit_card

import re

from .base import RegexDetector
from ..filth import CreditCardFilth
from scrubadub.detectors.catalogue import register_detector


[docs]@register_detector class CreditCardDetector(RegexDetector): """Remove credit-card numbers from dirty dirty ``text``. Supports Visa, MasterCard, American Express, Diners Club and JCB. """ name = 'credit_card' filth_cls = CreditCardFilth autoload = True # Regexes from: # http://www.regular-expressions.info/creditcard.html # Fake card numbers from: # https://www.paypalobjects.com/en_US/vhelp/paypalmanager_help/credit_card_numbers.htm # taken from the alphagov fork of scrubadub: https://github.com/alphagov/scrubadub # Looking at wikipedia, there are probably more numbers to detect: # https://en.wikipedia.org/wiki/Payment_card_number#Issuer_identification_number_.28IIN.29 # TODO: regex doesn't match if the credit card number has spaces/dashes in regex = re.compile(( r"(?<=\s)" r"(?:4[0-9]{12}(?:[0-9]{3})?" # Visa r"|(?:5[1-5][0-9]{2}" # MasterCard r"|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}" r"|3[47][0-9]{13}" # American Express r"|3(?:0[0-5]|[68][0-9])[0-9]{11}" # Diners Club r"|6(?:011|5[0-9]{2})[0-9]{12}" # Discover r"|(?:2131|1800|35\d{3})\d{11})" # JCB ), re.VERBOSE)