Pricing data checker script

I was recently given a task to implement a data-checker script which given an input file containing (date, last_price) values should check for 3 different kind of errors – missing values, stale values and outliers and returns list of errors. I wrote the code below and the feedback I got was that it was not “pythonic” enough. Can someone please let me know how I can make this more pythonic? What parts look good, what don’t? Will be extremely helpful as I write more python code in future.

#!/usr/bin/python3.6  import sys import csv import pprint import statistics from datetime import date  class DataChecker:     class DatePrice:         def __init__(self, input_date, price):             try:                 day, month, year = input_date.split("/")                 self._price_date = date(int(year), int(month), int(day))             except ValueError:                 # Don't tolerate invalid date                 raise              try:                 self._price = float(price)             except (TypeError, ValueError):                 self._price = 0          @property         def date(self):             return self._price_date.strftime("%d/%m/%Y")          @property         def date_obj(self):             return self._price_date          @property         def price(self):             return self._price          def __repr__(self):             return f"{self.date}, {self.price}"      def __init__(self, input_date_price_values):         self._date_price_values = []         for date, price in input_date_price_values:             try:                 self._date_price_values.append(self.DatePrice(date, price))             except ValueError:                 pass          self._date_price_values.sort(key=lambda x: x.date_obj)          self._stale_price_dict = {}          self._outlier_low, self._outlier_high = self._calculate_outlier_thresholds_using_iqr(             self._date_price_values         )      def check_for_errors(self):         """         returns -> List[tuple(date, float, error)]          errors = 'missing value', 'stale value' or 'outlier'          Uses 3 different error checkers to check for errors in data         1. Checks for missing values in data -> categorises missing values as any value == 0, or empty string or nulls         2. Checks for stale values in data -> categorises stale values as any value that remains unchanged              for 5 business days. For stale values it returns the last date on which it was repeated         3. Checks for outlier values in data -> Uses Interquartile range (IQR) and a low threshold of              first-quartile-value - 1.2 x IQR and high-threshold of third-quartile-value + 1.2 x IQR.              Any values outside this range are deemed as outliers         """         errors = []         for datePrice in self._date_price_values:             if self._is_value_missing(datePrice.price):                 self._add_to_errors(datePrice, "missing value", errors)             elif self._is_value_stale(datePrice.price):                 self._add_to_errors(datePrice, "stale value", errors)             elif self._is_value_outlier(datePrice.price):                 self._add_to_errors(datePrice, "outlier", errors)             else:                 continue          return errors      def _add_to_errors(self, datePrice, error_string, errors):         error_tuple = (datePrice.date, datePrice.price, error_string)         errors.append(error_tuple)      def _is_value_missing(self, price):         if price is None or price == 0:             return True         return False      def _is_value_stale(self, price):         if price in self._stale_price_dict:             self._stale_price_dict[price] += 1             if self._stale_price_dict[price] >= 5:  # 5 business days in week                 return True         else:             self._stale_price_dict.clear()             self._stale_price_dict[price] = 1         return False      def _is_value_outlier(self, price):         if price < self._outlier_low or price > self._outlier_high:             return True         return False      def _calculate_outlier_thresholds_using_iqr(self, data_price_values):         price_values = sorted([dataPrice.price for dataPrice in data_price_values])          median_index = len(price_values) // 2         first_quartile = statistics.median(price_values[:median_index])         third_quartile = statistics.median(price_values[median_index + 1 :])          iqr = third_quartile - first_quartile          low_iqr = first_quartile - 1.2 * iqr         high_iqr = third_quartile + 1.2 * iqr          return low_iqr, high_iqr      def _calculate_outlier_thresholds_using_mean_deviation(self, data_price_values):         price_values = sorted([dataPrice.price for dataPrice in data_price_values])          mean_value = statistics.mean(price_values)         std_dev = statistics.stdev(price_values)          low_iqr = mean_value - 2 * std_dev         high_iqr = mean_value + 2 * std_dev          return low_iqr, high_iqr   def check_file_data(file_path):     with open(file_path) as data_file:         raw_data = csv.DictReader(data_file)         input_data = []         for row in raw_data:             input_data.append((row["Date"], row["Last Price"]))          data_checker = DataChecker(input_data)         errors = data_checker.check_for_errors()          pp = pprint.PrettyPrinter(indent=4)         pp.pprint(errors)         print(f"Total Errors Found: {len(errors)}")          return errors   if __name__ == "__main__":     if len(sys.argv) < 2:         print("Please provide filepath")         sys.exit()      file_path = sys.argv[1]     check_file_data(file_path) 

To test put the above code in a file called “data_checker.py” and test code below in a file called “test_data_checker.py” in the same directory and run:

python3.6 -m pytest -v test_data_checker.py

import pytest  from data_checker import DataChecker  test_data = [     (         pytest.param(             [("01/02/2010", "10"), ("02/02/2010", "10.09"), ("03/02/2010", "10.12")],             [],             id="no-errors-in-data",         )     ),     (         pytest.param(             [("01/02/2010", "0.0"), ("02/02/2010", ""), ("03/02/2010", "10.12")],             [("01/02/2010", 0, "missing value"), ("02/02/2010", 0, "missing value")],             id="2-zero-values",         )     ),     (         pytest.param(             [                 ("01/02/2010", "2"),                 ("02/02/2010", "1.12"),                 ("03/02/2010", "1.12"),                 ("04/02/2010", "1.12"),                 ("05/02/2010", "1.12"),                 ("06/02/2010", "1.11"),             ],             [],             id="4-repeated-values-no-stale",         )     ),     (         pytest.param(             [                 ("01/02/2010", "1.10"),                 ("02/02/2010", "1.12"),                 ("03/02/2010", "1.12"),                 ("04/02/2010", "1.12"),                 ("05/02/2010", "1.12"),                 ("06/02/2010", "1.12"),                 ("07/02/2010", "1.11"),             ],             [("06/02/2010", 1.12, "stale value")],             id="1-stale-value",         )     ),     (         pytest.param(             [                 ("01/02/2010", "0"),                 ("02/02/2010", "1.12"),                 ("03/02/2010", "1.12"),                 ("04/02/2010", "1.12"),                 ("05/02/2010", "1.12"),                 ("06/02/2010", "1.12"),                 ("07/02/2010", "1.11"),             ],             [("01/02/2010", 0, "missing value"), ("06/02/2010", 1.12, "stale value")],             id="1-missing-1-stale-value",         )     ),     (         pytest.param(             [                 ("01/02/2010", "1.11"),                 ("02/02/2010", "5"),                 ("03/02/2010", "1.12"),                 ("04/02/2010", "1.11"),                 ("05/02/2010", "1.12"),                 ("06/02/2010", "1.12"),                 ("07/02/2010", "1.11"),             ],             [("02/02/2010", 5, "outlier")],             id="1-outlier-value",         )     ),     (         pytest.param(             [                 ("01/02/2010", "0"),                 ("02/02/2010", "5"),                 ("03/02/2010", "1.12"),                 ("04/02/2010", "1.11"),                 ("05/02/2010", "1.12"),                 ("06/02/2010", "1.12"),                 ("07/02/2010", "1.12"),                 ("08/02/2010", "1.12"),                 ("09/02/2010", "1.12"),             ],             [                 ("01/02/2010", 0, "missing value"),                 ("02/02/2010", 5, "outlier"),                 ("09/02/2010", 1.12, "stale value"),             ],             id="missing-stale-outlier-value",         )     ), ]   @pytest.mark.parametrize("input_data, expected_value", test_data) def test_check_for_error(input_data, expected_value):     data_checker = DataChecker(input_data)     errors = data_checker.check_for_errors()     assert errors == expected_value