Project

General

Profile

Actions

Bug #52426

open

mgr/predict: Can not predict SAS devices

Added by Liyan Wang over 2 years ago.

Status:
New
Priority:
Normal
Assignee:
-
Category:
ceph-mgr
Target version:
-
% Done:

0%

Source:
Tags:
Backport:
Regression:
No
Severity:
2 - major
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):

Description

Just SATA devices has ata_smart_attributes keys,SAS devices don't have the key "ata_smart_attribiutes".
The code mgr/diskprediction_local/module.py _predict_life_expentancy() need "ata_smart_attributes" key

        if len(health_data) >= 6:
            o_keys = sorted(health_data.keys(), reverse=True)
            for o_key in o_keys:
                # get values for current day (?)
                dev_smart = {}
                s_val = health_data[o_key]

                # add all smart attributes
                ata_smart = s_val.get('ata_smart_attributes', {})
                for attr in ata_smart.get('table', []):    =========> Get the information for predictor
                    # get raw smart values
                    if attr.get('raw', {}).get('string') is not None:
                        if str(attr.get('raw', {}).get('string', '0')).isdigit():
                            dev_smart['smart_%s_raw' % attr.get('id')] = \
                                int(attr.get('raw', {}).get('string', '0'))
                        else:
                            if str(attr.get('raw', {}).get('string', '0')).split(' ')[0].isdigit():
                                dev_smart['smart_%s_raw' % attr.get('id')] = \
                                    int(attr.get('raw', {}).get('string',
                                                                '0').split(' ')[0])
                            else:
                                dev_smart['smart_%s_raw' % attr.get('id')] = \
                                    attr.get('raw', {}).get('value', 0)
                    # get normalized smart values
                    if attr.get('value') is not None:
                        dev_smart['smart_%s_normalized' % attr.get('id')] = \
                            attr.get('value')
                # add power on hours manually if not available in smart attributes
                power_on_time = s_val.get('power_on_time', {}).get('hours')
                if power_on_time is not None:
                    dev_smart['smart_9_raw'] = int(power_on_time)

SATA device:

smartctl -a /dev/sdaa -j
{
  "json_format_version": [
    1,
    0
  ],
  "smartctl": {
    "version": [
      7,
      0
    ],
    "svn_revision": "4883",
    "platform_info": "x86_64-linux-3.10.0-514.26.2.el7.x86_64",
    "build_info": "(local build)",
    "argv": [
      "smartctl",
      "-a",
      "/dev/sdaa",
      "-j" 
    ],
    "exit_status": 0
  },
  "device": {
    "name": "/dev/sdaa",
    "info_name": "/dev/sdaa [SAT]",
    "type": "sat",
    "protocol": "ATA" 
  },
  "model_name": "ST8000NM000A-2KE101",
  "serial_number": "WKD14TR8",
  "wwn": {
    "naa": 5,
    "oui": 3152,
    "id": 3482901268
  },
  "firmware_version": "SN03",
  "user_capacity": {
    "blocks": 15628053168,
    "bytes": 8001563222016
  },
  "logical_block_size": 512,
  "physical_block_size": 4096,
  "rotation_rate": 7200,
  "form_factor": {
    "ata_value": 2,
    "name": "3.5 inches" 
  },
  "in_smartctl_database": false,
  "ata_version": {
    "string": "ACS-4 (minor revision not indicated)",
    "major_value": 4064,
    "minor_value": 65535
  },
  "sata_version": {
    "string": "SATA 3.3",
    "value": 511
  },
  "interface_speed": {
    "max": {
      "sata_value": 14,
      "string": "6.0 Gb/s",
      "units_per_second": 60,
      "bits_per_unit": 100000000
    },
    "current": {
      "sata_value": 3,
      "string": "6.0 Gb/s",
      "units_per_second": 60,
      "bits_per_unit": 100000000
    }
  },
  "local_time": {
    "time_t": 1630031490,
    "asctime": "Fri Aug 27 10:31:30 2021 CST" 
  },
  "smart_status": {
    "passed": true
  },
  "ata_smart_data": {  =========>This is the key
    "offline_data_collection": {
      "status": {
        "value": 130,
        "string": "was completed without error",
        "passed": true
      },
      "completion_seconds": 567
    },
    "self_test": {
      "status": {
        "value": 0,
        "string": "completed without error",
        "passed": true
      },
      "polling_minutes": {
        "short": 1,
        "extended": 730,
        "conveyance": 2
      }
    },
    "capabilities": {
      "values": [
        123,
        3
      ],
      "exec_offline_immediate_supported": true,
      "offline_is_aborted_upon_new_cmd": false,
      "offline_surface_scan_supported": true,
      "self_tests_supported": true,
      "conveyance_self_test_supported": true,
      "selective_self_test_supported": true,
      "attribute_autosave_enabled": true,
      "error_logging_supported": true,
      "gp_logging_supported": true
    }
  },
  "ata_sct_capabilities": {
    "value": 28861,
    "error_recovery_control_supported": true,
    "feature_control_supported": true,
    "data_table_supported": true
  },
  "ata_smart_attributes": {
    "revision": 10,
    "table": [
      {
        "id": 1,
        "name": "Raw_Read_Error_Rate",
        "value": 79,
        "worst": 64,
        "thresh": 44,
        "when_failed": "",
        "flags": {
          "value": 15,
          "string": "POSR-- ",
          "prefailure": true,
          "updated_online": true,
          "performance": true,
          "error_rate": true,
          "event_count": false,
          "auto_keep": false
        },
        "raw": {
          "value": 85892432,
          "string": "85892432" 
        }
      },
      {
        "id": 3,
        "name": "Spin_Up_Time",
        "value": 90,
        "worst": 90,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 3,
          "string": "PO---- ",
          "prefailure": true,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": false,
          "auto_keep": false
        },
        "raw": {
          "value": 0,
          "string": "0" 
        }
      },
      {
        "id": 4,
        "name": "Start_Stop_Count",
        "value": 100,
        "worst": 100,
        "thresh": 20,
        "when_failed": "",
        "flags": {
          "value": 50,
          "string": "-O--CK ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 44,
          "string": "44" 
        }
      },
      {
        "id": 5,
        "name": "Reallocated_Sector_Ct",
        "value": 100,
        "worst": 100,
        "thresh": 10,
        "when_failed": "",
        "flags": {
          "value": 51,
          "string": "PO--CK ",
          "prefailure": true,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 0,
          "string": "0" 
        }
      },
      {
        "id": 7,
        "name": "Seek_Error_Rate",
        "value": 80,
        "worst": 60,
        "thresh": 45,
        "when_failed": "",
        "flags": {
          "value": 15,
          "string": "POSR-- ",
          "prefailure": true,
          "updated_online": true,
          "performance": true,
          "error_rate": true,
          "event_count": false,
          "auto_keep": false
        },
        "raw": {
          "value": 105714286,
          "string": "105714286" 
        }
      },
      {
        "id": 9,
        "name": "Power_On_Hours",
        "value": 98,
        "worst": 98,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 50,
          "string": "-O--CK ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 2350,
          "string": "2350" 
        }
      },
      {
        "id": 10,
        "name": "Spin_Retry_Count",
        "value": 100,
        "worst": 100,
        "thresh": 97,
        "when_failed": "",
        "flags": {
          "value": 19,
          "string": "PO--C- ",
          "prefailure": true,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": false
        },
        "raw": {
          "value": 0,
          "string": "0" 
        }
      },
      {
        "id": 12,
        "name": "Power_Cycle_Count",
        "value": 100,
        "worst": 100,
        "thresh": 20,
        "when_failed": "",
        "flags": {
          "value": 50,
          "string": "-O--CK ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 27,
          "string": "27" 
        }
      },
      {
        "id": 18,
        "name": "Unknown_Attribute",
        "value": 100,
        "worst": 100,
        "thresh": 50,
        "when_failed": "",
        "flags": {
          "value": 11,
          "string": "PO-R-- ",
          "prefailure": true,
          "updated_online": true,
          "performance": false,
          "error_rate": true,
          "event_count": false,
          "auto_keep": false
        },
        "raw": {
          "value": 0,
          "string": "0" 
        }
      },
      {
        "id": 187,
        "name": "Reported_Uncorrect",
        "value": 100,
        "worst": 100,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 50,
          "string": "-O--CK ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 0,
          "string": "0" 
        }
      },
      {
        "id": 188,
        "name": "Command_Timeout",
        "value": 100,
        "worst": 99,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 50,
          "string": "-O--CK ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 4295032833,
          "string": "4295032833" 
        }
      },
      {
        "id": 190,
        "name": "Airflow_Temperature_Cel",
        "value": 63,
        "worst": 45,
        "thresh": 40,
        "when_failed": "",
        "flags": {
          "value": 34,
          "string": "-O---K ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": false,
          "auto_keep": true
        },
        "raw": {
          "value": 689438757,
          "string": "37 (Min/Max 24/41)" 
        }
      },
      {
        "id": 192,
        "name": "Power-Off_Retract_Count",
        "value": 100,
        "worst": 100,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 50,
          "string": "-O--CK ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 12,
          "string": "12" 
        }
      },
      {
        "id": 193,
        "name": "Load_Cycle_Count",
        "value": 100,
        "worst": 100,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 50,
          "string": "-O--CK ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 122,
          "string": "122" 
        }
      },
      {
        "id": 194,
        "name": "Temperature_Celsius",
        "value": 37,
        "worst": 48,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 34,
          "string": "-O---K ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": false,
          "auto_keep": true
        },
        "raw": {
          "value": 47244640293,
          "string": "37 (0 11 0 0 0)" 
        }
      },
      {
        "id": 195,
        "name": "Hardware_ECC_Recovered",
        "value": 3,
        "worst": 1,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 26,
          "string": "-O-RC- ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": true,
          "event_count": true,
          "auto_keep": false
        },
        "raw": {
          "value": 85892432,
          "string": "85892432" 
        }
      },
      {
        "id": 197,
        "name": "Current_Pending_Sector",
        "value": 100,
        "worst": 100,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 18,
          "string": "-O--C- ",
          "prefailure": false,
          "updated_online": true,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": false
        },
        "raw": {
          "value": 0,
          "string": "0" 
        }
      },
      {
        "id": 198,
        "name": "Offline_Uncorrectable",
        "value": 100,
        "worst": 100,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 16,
          "string": "----C- ",
          "prefailure": false,
          "updated_online": false,
          "performance": false,
          "error_rate": false,
          "event_count": true,
          "auto_keep": false
        },
        "raw": {
          "value": 0,
          "string": "0" 
        }
      },
      {
        "id": 199,
        "name": "UDMA_CRC_Error_Count",
        "value": 200,
        "worst": 200,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 62,
          "string": "-OSRCK ",
          "prefailure": false,
          "updated_online": true,
          "performance": true,
          "error_rate": true,
          "event_count": true,
          "auto_keep": true
        },
        "raw": {
          "value": 0,
          "string": "0" 
        }
      },
      {
        "id": 240,
        "name": "Head_Flying_Hours",
        "value": 100,
        "worst": 253,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 0,
          "string": "------ ",
          "prefailure": false,
          "updated_online": false,
          "performance": false,
          "error_rate": false,
          "event_count": false,
          "auto_keep": false
        },
        "raw": {
          "value": 64969970288200,
          "string": "1608 (59 23 0)" 
        }
      },
      {
        "id": 241,
        "name": "Total_LBAs_Written",
        "value": 100,
        "worst": 253,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 0,
          "string": "------ ",
          "prefailure": false,
          "updated_online": false,
          "performance": false,
          "error_rate": false,
          "event_count": false,
          "auto_keep": false
        },
        "raw": {
          "value": 21473887655,
          "string": "21473887655" 
        }
      },
      {
        "id": 242,
        "name": "Total_LBAs_Read",
        "value": 100,
        "worst": 253,
        "thresh": 0,
        "when_failed": "",
        "flags": {
          "value": 0,
          "string": "------ ",
          "prefailure": false,
          "updated_online": false,
          "performance": false,
          "error_rate": false,
          "event_count": false,
          "auto_keep": false
        },
        "raw": {
          "value": 3328754171,
          "string": "3328754171" 
        }
      }
    ]
  },
  "power_on_time": {
    "hours": 2350
  },
  "power_cycle_count": 27,
  "temperature": {
    "current": 37
  },
  "ata_smart_error_log": {
    "summary": {
      "revision": 1,
      "count": 0
    }
  },
  "ata_smart_self_test_log": {
    "standard": {
      "revision": 1,
      "table": [
        {
          "type": {
            "value": 129,
            "string": "Short captive" 
          },
          "status": {
            "value": 0,
            "string": "Completed without error",
            "passed": true
          },
          "lifetime_hours": 504
        },
        {
          "type": {
            "value": 129,
            "string": "Short captive" 
          },
          "status": {
            "value": 0,
            "string": "Completed without error",
            "passed": true
          },
          "lifetime_hours": 503
        },
        {
          "type": {
            "value": 1,
            "string": "Short offline" 
          },
          "status": {
            "value": 0,
            "string": "Completed without error",
            "passed": true
          },
          "lifetime_hours": 0
        },
        {
          "type": {
            "value": 1,
            "string": "Short offline" 
          },
          "status": {
            "value": 0,
            "string": "Completed without error",
            "passed": true
          },
          "lifetime_hours": 0
        }
      ],
      "count": 4,
      "error_count_total": 0,
      "error_count_outdated": 0
    }
  },
  "ata_smart_selective_self_test_log": {
    "revision": 1,
    "table": [
      {
        "lba_min": 0,
        "lba_max": 0,
        "status": {
          "value": 0,
          "string": "Not_testing" 
        }
      },
      {
        "lba_min": 0,
        "lba_max": 0,
        "status": {
          "value": 0,
          "string": "Not_testing" 
        }
      },
      {
        "lba_min": 0,
        "lba_max": 0,
        "status": {
          "value": 0,
          "string": "Not_testing" 
        }
      },
      {
        "lba_min": 0,
        "lba_max": 0,
        "status": {
          "value": 0,
          "string": "Not_testing" 
        }
      },
      {
        "lba_min": 0,
        "lba_max": 0,
        "status": {
          "value": 0,
          "string": "Not_testing" 
        }
      }
    ],
    "flags": {
      "value": 0,
      "remainder_scan_enabled": false
    },
    "power_up_scan_resume_minutes": 0
  }
}

SAS device, no "ata_smart_attributes" :

smartctl -a /dev/sdah -j
{
  "json_format_version": [
    1,
    0
  ],
  "smartctl": {
    "version": [
      7,
      0
    ],
    "svn_revision": "4883",
    "platform_info": "x86_64-linux-3.10.0-514.26.2.el7.x86_64",
    "build_info": "(local build)",
    "argv": [
      "smartctl",
      "-a",
      "/dev/sdah",
      "-j" 
    ],
    "exit_status": 0
  },
  "device": {
    "name": "/dev/sdah",
    "info_name": "/dev/sdah",
    "type": "scsi",
    "protocol": "SCSI" 
  },
  "vendor": "HGST",
  "product": "HUS726060AL5210",
  "model_name": "HGST HUS726060AL5210",
  "revision": "A907",
  "scsi_version": "SPC-4",
  "user_capacity": {
    "blocks": 11721045168,
    "bytes": 6001175126016
  },
  "logical_block_size": 512,
  "physical_block_size": 4096,
  "rotation_rate": 7200,
  "form_factor": {
    "scsi_value": 2,
    "name": "3.5 inches" 
  },
  "serial_number": "NCHPB2YZ",
  "device_type": {
    "scsi_value": 0,
    "name": "disk" 
  },
  "local_time": {
    "time_t": 1630032117,
    "asctime": "Fri Aug 27 10:41:57 2021 CST" 
  },
  "smart_status": {
    "passed": true
  },
  "temperature": {
    "current": 40,
    "drive_trip": 85
  },
  "scsi_grown_defect_list": 0,
  "scsi_error_counter_log": {
    "read": {
      "errors_corrected_by_eccfast": 0,
      "errors_corrected_by_eccdelayed": 15,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 15,
      "correction_algorithm_invocations": 1781830,
      "gigabytes_processed": "38534.502",
      "total_uncorrected_errors": 0
    },
    "write": {
      "errors_corrected_by_eccfast": 0,
      "errors_corrected_by_eccdelayed": 0,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 0,
      "correction_algorithm_invocations": 14359899,
      "gigabytes_processed": "65789.097",
      "total_uncorrected_errors": 0
    },
    "verify": {
      "errors_corrected_by_eccfast": 0,
      "errors_corrected_by_eccdelayed": 0,
      "errors_corrected_by_rereads_rewrites": 0,
      "total_errors_corrected": 0,
      "correction_algorithm_invocations": 454085,
      "gigabytes_processed": "0.000",
      "total_uncorrected_errors": 0
    }
  }
}

No data to display

Actions

Also available in: Atom PDF