Python Unittest Example

Some time ago, I applied for a software QA position and was given this assignment:

Problem

Given 2 input files with rows in the format:

<account ID><billing ID><Sign-up Date in MM-DD-YYYY format>

Write a python script to print:

  • All accounts in the first file not in the second
  • All accounts in the second file not in first
  • All accounts in both files with inconsistent data
  • Incomplete rows
  • All accounts in both with consistent data

Solution

Let us assume the first data file is as follows:

//one<account_id><billing_id><sign_up_date>

//IDs in first file not in second
<01> <05> <01-01-2015>
<02> <04> <01-02-2015>
<03> <03> <01-03-2015>
<04> <02> <01-04-2015>
<05> <01> <01-05-2015>

//IDs in both consistent (same account_id, billing_id, date)
<10> <10> <10-10-2010>
<11> <11> <11-11-2011>
<12> <12> <12-12-2012>

//IDs in both inconsistent (either billing id or date is different)
<13> <13> <07-07-2007>
<14> <14> <03-03-2003>

//Incomplete rows in first
<01> <01-05-2015>
<05-05-2010>
<07> <08>
<09>

and the second data file is as follows:


//two<account_id><billing_id><sign_up_date>

//IDs in second file not in first
<06> <10> <01-06-2015>
<07> <09> <01-07-2015>
<08> <08> <01-08-2015>
<09> <07> <01-09-2015>
<10> <06> <01-10-2015>

//IDs in both consistent (same account_id, billing_id, date)
<10> <10> <10-10-2010>
<11> <11> <11-11-2011>
<12> <12> <12-12-2012>

//IDs in both inconsistent (either billing id or date is different)
<13> <13> <08-08-2008>
<14> <10> <03-03-2003>

//Incomplete rows in second
<02> <02-06-2016>
<06-06-2011>
<08> <09>
<10>

Here is the python code:


# Regular expressions
import re

# Unit testing
import unittest

# A class for a data file loaded from disk
class Accounts:

   # Initialize with file name
   def __init__(self, fileName):
      # Store the contents of the file in a hash table for fast lookup
      # Account ID is the key
      # Billing ID and Signup date as a list of two items is the value
      self.data = {}
   
      # Incomplete rows in the file
      self.incomplete = []
   
      # Comment lines in the file
      self.comments = []
   
      # File to load data from
      self.fileName = fileName
  
   # Load data from file
   def loadData(self):
      # Open the data file for reading
      with open(self.fileName, 'r') as f:
         # Read the file line by line
         for line in f:
            # Split the line into tokens. Use [<, >, \n] as separators
            # Filter out empty tokens
            tokens = filter(None, re.split('<|>|\n| ', line))

            # Skip empty lines
            if len(tokens) == 0:
               pass
            # Detect comment lines
            elif tokens[0].startswith('//'):
               self.comments.append(" ".join(tokens))
            # Add lines containing valid account information
            elif len(tokens) == 3:
               self.data[tokens[0]] = tokens[1:]
            # Incomplete lines containing less than 3 tokens
            else:
               self.incomplete.append(tokens)
         
   # Get accounts in this file not in the second file
   def rowsNotIn(self, acc):
      keys = []
      for key, value in self.data.items():
         if key not in acc.data:
            keys.append(key)
      return keys
	
   # Get accounts common to both files having consistent data
   def conistentRowsCommonWith(self, acc):
      keys = []
      for key, value in self.data.items():
         if key in acc.data and value == acc.data[key]:
            keys.append(key)
      return keys

   # Get accounts common to both files having inconsistent data
   def inconsistentRowsCommonWith(self, acc):
      keys = []
      for key, value in self.data.items():
         if key in acc.data and value != acc.data[key]:
            keys.append(key)
      return keys

   # Get all rows containing incomplete data
   def incompleteRows(self):
      return self.incomplete
	
# Test cases
class Tests(unittest.TestCase):
   def setUp(self):
      # First file 
      self.a = Accounts('data_01.txt')
      # Second file
      self.b = Accounts('data_02.txt')

      # Load files from disk
      self.a.loadData()
      self.b.loadData()

   # Find all accounts in first file not in the second
   def test_01_rowsInOneNotInTwo(self):							
      self.assertItemsEqual(self.a.rowsNotIn(self.b), \
      ['01', '02', '03', '04', '05'])

   # Find all accounts in second file not in first
   def test_02_rowsInTwoNotInOne(self):							
      self.assertItemsEqual(self.b.rowsNotIn(self.a), \
      ['06', '07', '08', '09'])

   # Find accounts in both files having consistent data
   def test_03_conistentRows(self):							
      self.assertItemsEqual(self.a.conistentRowsCommonWith(self.b), \
      ['10', '12', '11'])

   # Find accounts in both files having inconsistent data
   def test_04_inconistentRows(self):							
      self.assertItemsEqual(self.a.inconsistentRowsCommonWith(self.b), \
      ['13', '14'])

   # Find incomplete rows in a data file
   def test_05_inconistentRows(self):							
      self.assertItemsEqual(self.a.incompleteRows(), \
      [['01', '01-05-2015'], ['05-05-2010'], ['07', '08'], ['09']])
      self.assertItemsEqual(self.b.incompleteRows(), \
      [['02', '02-06-2016'], ['06-06-2011'], ['08', '09'], ['10']])

# Run test cases
unittest.main()

Leave a Reply

%d bloggers like this: