Actions
Bug #52426
openmgr/predict: Can not predict SAS devices
Status:
New
Priority:
Normal
Assignee:
-
Category:
ceph-mgr
Target version:
-
% Done:
0%
Source:
Tags:
Backport:
Regression:
No
Severity:
2 - major
Reviewed:
Affected Versions:
ceph-qa-suite:
Pull request ID:
Crash signature (v1):
Crash signature (v2):
Description
Just SATA devices has ata_smart_attributes keys,SAS devices don't have the key "ata_smart_attribiutes".
The code mgr/diskprediction_local/module.py _predict_life_expentancy() need "ata_smart_attributes" key
if len(health_data) >= 6:
o_keys = sorted(health_data.keys(), reverse=True)
for o_key in o_keys:
# get values for current day (?)
dev_smart = {}
s_val = health_data[o_key]
# add all smart attributes
ata_smart = s_val.get('ata_smart_attributes', {})
for attr in ata_smart.get('table', []): =========> Get the information for predictor
# get raw smart values
if attr.get('raw', {}).get('string') is not None:
if str(attr.get('raw', {}).get('string', '0')).isdigit():
dev_smart['smart_%s_raw' % attr.get('id')] = \
int(attr.get('raw', {}).get('string', '0'))
else:
if str(attr.get('raw', {}).get('string', '0')).split(' ')[0].isdigit():
dev_smart['smart_%s_raw' % attr.get('id')] = \
int(attr.get('raw', {}).get('string',
'0').split(' ')[0])
else:
dev_smart['smart_%s_raw' % attr.get('id')] = \
attr.get('raw', {}).get('value', 0)
# get normalized smart values
if attr.get('value') is not None:
dev_smart['smart_%s_normalized' % attr.get('id')] = \
attr.get('value')
# add power on hours manually if not available in smart attributes
power_on_time = s_val.get('power_on_time', {}).get('hours')
if power_on_time is not None:
dev_smart['smart_9_raw'] = int(power_on_time)
SATA device:
smartctl -a /dev/sdaa -j
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
0
],
"svn_revision": "4883",
"platform_info": "x86_64-linux-3.10.0-514.26.2.el7.x86_64",
"build_info": "(local build)",
"argv": [
"smartctl",
"-a",
"/dev/sdaa",
"-j"
],
"exit_status": 0
},
"device": {
"name": "/dev/sdaa",
"info_name": "/dev/sdaa [SAT]",
"type": "sat",
"protocol": "ATA"
},
"model_name": "ST8000NM000A-2KE101",
"serial_number": "WKD14TR8",
"wwn": {
"naa": 5,
"oui": 3152,
"id": 3482901268
},
"firmware_version": "SN03",
"user_capacity": {
"blocks": 15628053168,
"bytes": 8001563222016
},
"logical_block_size": 512,
"physical_block_size": 4096,
"rotation_rate": 7200,
"form_factor": {
"ata_value": 2,
"name": "3.5 inches"
},
"in_smartctl_database": false,
"ata_version": {
"string": "ACS-4 (minor revision not indicated)",
"major_value": 4064,
"minor_value": 65535
},
"sata_version": {
"string": "SATA 3.3",
"value": 511
},
"interface_speed": {
"max": {
"sata_value": 14,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
},
"current": {
"sata_value": 3,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
}
},
"local_time": {
"time_t": 1630031490,
"asctime": "Fri Aug 27 10:31:30 2021 CST"
},
"smart_status": {
"passed": true
},
"ata_smart_data": { =========>This is the key
"offline_data_collection": {
"status": {
"value": 130,
"string": "was completed without error",
"passed": true
},
"completion_seconds": 567
},
"self_test": {
"status": {
"value": 0,
"string": "completed without error",
"passed": true
},
"polling_minutes": {
"short": 1,
"extended": 730,
"conveyance": 2
}
},
"capabilities": {
"values": [
123,
3
],
"exec_offline_immediate_supported": true,
"offline_is_aborted_upon_new_cmd": false,
"offline_surface_scan_supported": true,
"self_tests_supported": true,
"conveyance_self_test_supported": true,
"selective_self_test_supported": true,
"attribute_autosave_enabled": true,
"error_logging_supported": true,
"gp_logging_supported": true
}
},
"ata_sct_capabilities": {
"value": 28861,
"error_recovery_control_supported": true,
"feature_control_supported": true,
"data_table_supported": true
},
"ata_smart_attributes": {
"revision": 10,
"table": [
{
"id": 1,
"name": "Raw_Read_Error_Rate",
"value": 79,
"worst": 64,
"thresh": 44,
"when_failed": "",
"flags": {
"value": 15,
"string": "POSR-- ",
"prefailure": true,
"updated_online": true,
"performance": true,
"error_rate": true,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 85892432,
"string": "85892432"
}
},
{
"id": 3,
"name": "Spin_Up_Time",
"value": 90,
"worst": 90,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 3,
"string": "PO---- ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 4,
"name": "Start_Stop_Count",
"value": 100,
"worst": 100,
"thresh": 20,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 44,
"string": "44"
}
},
{
"id": 5,
"name": "Reallocated_Sector_Ct",
"value": 100,
"worst": 100,
"thresh": 10,
"when_failed": "",
"flags": {
"value": 51,
"string": "PO--CK ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 7,
"name": "Seek_Error_Rate",
"value": 80,
"worst": 60,
"thresh": 45,
"when_failed": "",
"flags": {
"value": 15,
"string": "POSR-- ",
"prefailure": true,
"updated_online": true,
"performance": true,
"error_rate": true,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 105714286,
"string": "105714286"
}
},
{
"id": 9,
"name": "Power_On_Hours",
"value": 98,
"worst": 98,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 2350,
"string": "2350"
}
},
{
"id": 10,
"name": "Spin_Retry_Count",
"value": 100,
"worst": 100,
"thresh": 97,
"when_failed": "",
"flags": {
"value": 19,
"string": "PO--C- ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 12,
"name": "Power_Cycle_Count",
"value": 100,
"worst": 100,
"thresh": 20,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 27,
"string": "27"
}
},
{
"id": 18,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 50,
"when_failed": "",
"flags": {
"value": 11,
"string": "PO-R-- ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": true,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 187,
"name": "Reported_Uncorrect",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 188,
"name": "Command_Timeout",
"value": 100,
"worst": 99,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 4295032833,
"string": "4295032833"
}
},
{
"id": 190,
"name": "Airflow_Temperature_Cel",
"value": 63,
"worst": 45,
"thresh": 40,
"when_failed": "",
"flags": {
"value": 34,
"string": "-O---K ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 689438757,
"string": "37 (Min/Max 24/41)"
}
},
{
"id": 192,
"name": "Power-Off_Retract_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 12,
"string": "12"
}
},
{
"id": 193,
"name": "Load_Cycle_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 122,
"string": "122"
}
},
{
"id": 194,
"name": "Temperature_Celsius",
"value": 37,
"worst": 48,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 34,
"string": "-O---K ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 47244640293,
"string": "37 (0 11 0 0 0)"
}
},
{
"id": 195,
"name": "Hardware_ECC_Recovered",
"value": 3,
"worst": 1,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 26,
"string": "-O-RC- ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": true,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 85892432,
"string": "85892432"
}
},
{
"id": 197,
"name": "Current_Pending_Sector",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 18,
"string": "-O--C- ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 198,
"name": "Offline_Uncorrectable",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 16,
"string": "----C- ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 199,
"name": "UDMA_CRC_Error_Count",
"value": 200,
"worst": 200,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 62,
"string": "-OSRCK ",
"prefailure": false,
"updated_online": true,
"performance": true,
"error_rate": true,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 240,
"name": "Head_Flying_Hours",
"value": 100,
"worst": 253,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 0,
"string": "------ ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 64969970288200,
"string": "1608 (59 23 0)"
}
},
{
"id": 241,
"name": "Total_LBAs_Written",
"value": 100,
"worst": 253,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 0,
"string": "------ ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 21473887655,
"string": "21473887655"
}
},
{
"id": 242,
"name": "Total_LBAs_Read",
"value": 100,
"worst": 253,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 0,
"string": "------ ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 3328754171,
"string": "3328754171"
}
}
]
},
"power_on_time": {
"hours": 2350
},
"power_cycle_count": 27,
"temperature": {
"current": 37
},
"ata_smart_error_log": {
"summary": {
"revision": 1,
"count": 0
}
},
"ata_smart_self_test_log": {
"standard": {
"revision": 1,
"table": [
{
"type": {
"value": 129,
"string": "Short captive"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 504
},
{
"type": {
"value": 129,
"string": "Short captive"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 503
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 0
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 0
}
],
"count": 4,
"error_count_total": 0,
"error_count_outdated": 0
}
},
"ata_smart_selective_self_test_log": {
"revision": 1,
"table": [
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
}
],
"flags": {
"value": 0,
"remainder_scan_enabled": false
},
"power_up_scan_resume_minutes": 0
}
}
SAS device, no "ata_smart_attributes" :
smartctl -a /dev/sdah -j
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
0
],
"svn_revision": "4883",
"platform_info": "x86_64-linux-3.10.0-514.26.2.el7.x86_64",
"build_info": "(local build)",
"argv": [
"smartctl",
"-a",
"/dev/sdah",
"-j"
],
"exit_status": 0
},
"device": {
"name": "/dev/sdah",
"info_name": "/dev/sdah",
"type": "scsi",
"protocol": "SCSI"
},
"vendor": "HGST",
"product": "HUS726060AL5210",
"model_name": "HGST HUS726060AL5210",
"revision": "A907",
"scsi_version": "SPC-4",
"user_capacity": {
"blocks": 11721045168,
"bytes": 6001175126016
},
"logical_block_size": 512,
"physical_block_size": 4096,
"rotation_rate": 7200,
"form_factor": {
"scsi_value": 2,
"name": "3.5 inches"
},
"serial_number": "NCHPB2YZ",
"device_type": {
"scsi_value": 0,
"name": "disk"
},
"local_time": {
"time_t": 1630032117,
"asctime": "Fri Aug 27 10:41:57 2021 CST"
},
"smart_status": {
"passed": true
},
"temperature": {
"current": 40,
"drive_trip": 85
},
"scsi_grown_defect_list": 0,
"scsi_error_counter_log": {
"read": {
"errors_corrected_by_eccfast": 0,
"errors_corrected_by_eccdelayed": 15,
"errors_corrected_by_rereads_rewrites": 0,
"total_errors_corrected": 15,
"correction_algorithm_invocations": 1781830,
"gigabytes_processed": "38534.502",
"total_uncorrected_errors": 0
},
"write": {
"errors_corrected_by_eccfast": 0,
"errors_corrected_by_eccdelayed": 0,
"errors_corrected_by_rereads_rewrites": 0,
"total_errors_corrected": 0,
"correction_algorithm_invocations": 14359899,
"gigabytes_processed": "65789.097",
"total_uncorrected_errors": 0
},
"verify": {
"errors_corrected_by_eccfast": 0,
"errors_corrected_by_eccdelayed": 0,
"errors_corrected_by_rereads_rewrites": 0,
"total_errors_corrected": 0,
"correction_algorithm_invocations": 454085,
"gigabytes_processed": "0.000",
"total_uncorrected_errors": 0
}
}
}
No data to display
Actions