For production environments, I recommend turning it into a modular RAC Monitoring Framework rather than a single script. That makes it easier to schedule, troubleshoot, and extend.
Oracle RAC Monitoring Framework
Directory Structure
rac_monitoring/
├── rac_health_check.sh
├── db_health.sql
├── asm_health.sql
├── wait_events.sql
├── blocking_sessions.sql
├── tablespace.sql
├── fra_usage.sql
├── archive_log.sql
├── cpu_memory.sh
├── alert_log.sh
├── generate_report.sh
├── reports/
├── logs/
└── config.env
1. Configuration File (config.env)
#!/bin/bash
export ORACLE_BASE=/u01/app/oracle
export GRID_HOME=/u01/app/19.0.0/grid
export ORACLE_HOME=/u01/app/oracle/product/19.0.0/dbhome_1
export ORACLE_SID=PROD1
export PATH=$GRID_HOME/bin:$ORACLE_HOME/bin:$PATH
DB_NAME=PROD
REPORT_DIR=/home/oracle/rac_monitoring/reports
LOG_DIR=/home/oracle/rac_monitoring/logs
DATE=$(date +"%Y%m%d_%H%M%S")
REPORT=${REPORT_DIR}/RAC_Health_${DATE}.html
LOGFILE=${LOG_DIR}/RAC_Health_${DATE}.log
2. RAC Health Check Script (rac_health_check.sh)
#!/bin/bash
source ./config.env
exec > $LOGFILE
echo "==============================================="
echo "Oracle RAC Health Check"
echo "Server : $(hostname)"
echo "Date : $(date)"
echo "==============================================="
echo
echo "=============================="
echo "Clusterware Status"
echo "=============================="
crsctl check crs
echo
echo "=============================="
echo "Cluster Resources"
echo "=============================="
crsctl stat res -t
echo
echo "=============================="
echo "Node Status"
echo "=============================="
olsnodes -n -s
echo
echo "=============================="
echo "ASM Status"
echo "=============================="
srvctl status asm
echo
echo "=============================="
echo "Diskgroups"
echo "=============================="
asmcmd lsdg
echo
echo "=============================="
echo "Database Status"
echo "=============================="
srvctl status database -d ${DB_NAME}
echo
echo "=============================="
echo "Services"
echo "=============================="
srvctl status service -d ${DB_NAME}
echo
echo "=============================="
echo "Listener"
echo "=============================="
srvctl status listener
echo
echo "=============================="
echo "SCAN Listener"
echo "=============================="
srvctl status scan_listener
echo
echo "=============================="
echo "VIP"
echo "=============================="
srvctl status vip
echo
echo "=============================="
echo "OCR"
echo "=============================="
ocrcheck
echo
echo "=============================="
echo "Voting Disk"
echo "=============================="
crsctl query css votedisk
echo
echo "Health Check Completed"
3. Wait Event Monitoring (wait_events.sql)
set lines 200
col event format a45
SELECT
event,
total_waits,
time_waited,
average_wait
FROM v$system_event
ORDER BY time_waited DESC
FETCH FIRST 20 ROWS ONLY;
4. Blocking Sessions
set lines 200
SELECT
inst_id,
sid,
serial#,
username,
blocking_session,
seconds_in_wait,
event
FROM gv$session
WHERE blocking_session IS NOT NULL;
5. ASM Monitoring
set lines 200
SELECT
name,
state,
type,
total_mb,
free_mb,
ROUND(free_mb*100/total_mb,2) FREE_PERCENT
FROM
v$asm_diskgroup;
6. Tablespace Monitoring
SELECT
tablespace_name,
ROUND(used_percent,2) USED_PERCENT
FROM dba_tablespace_usage_metrics
ORDER BY used_percent DESC;
7. FRA Monitoring
SELECT
SPACE_LIMIT/1024/1024 MB_LIMIT,
SPACE_USED/1024/1024 MB_USED,
SPACE_RECLAIMABLE/1024/1024 MB_RECLAIMABLE
FROM
V$RECOVERY_FILE_DEST;
8. Archive Log Generation
SELECT
TRUNC(first_time),
COUNT(*),
ROUND(SUM(blocks*block_size)/1024/1024/1024,2) GB
FROM
v$archived_log
GROUP BY
TRUNC(first_time)
ORDER BY
1 DESC;
9. CPU & Memory Monitoring (cpu_memory.sh)
#!/bin/bash
echo "========== CPU =========="
top -bn1 | head -5
echo
echo "========== Memory =========="
free -g
echo
echo "========== Swap =========="
swapon -s
echo
echo "========== Disk =========="
df -h
10. Alert Log Monitoring (alert_log.sh)
#!/bin/bash
adrci exec="show alert -tail 200"
11. Cluster Log Collection
#!/bin/bash
diagcollection.pl --collect cluster
12. Email Report
mailx -s "Oracle RAC Health Report $(hostname)" \
shashi_dba@shashidba.com < $LOGFILE
13. Cron Scheduling
Run every hour:
0 * * * * /home/oracle/rac_monitoring/rac_health_check.sh
Run daily at 8 AM:
0 8 * * * /home/oracle/rac_monitoring/rac_health_check.sh
Run every Sunday:
0 6 * * 0 /home/oracle/rac_monitoring/rac_health_check.sh
Sample Health Check Output
===================================================
Oracle RAC Health Check
===================================================
Hostname : racnode1
Date : 01-Jul-2026 08:00
✔ CRS Status ONLINE
✔ Cluster Resources ONLINE
✔ Node Status ACTIVE
✔ ASM RUNNING
✔ Diskgroups DATA, RECO, OCR
✔ Database PROD OPEN
✔ Services RUNNING
✔ Listener RUNNING
✔ SCAN RUNNING
✔ VIP RUNNING
✔ OCR HEALTHY
✔ Voting Disk NORMAL
Tablespace Usage
----------------------------
SYSTEM 72%
SYSAUX 61%
USERS 42%
TEMP 15%
ASM Usage
----------------------------
DATA 67%
RECO 58%
Blocking Sessions : NONE
Top Wait Event
----------------------------
db file sequential read
CPU Usage : 18%
Memory Usage : 63%
Overall RAC Health : PASS
No comments:
Post a Comment