|
今天接到一个用户的电话,说那边的服务器有问题,让我过去看看。
环境:RedHat AS3 (Taroon)+2G ECC+4块36G的SCIS(raid 5)
kernel: 2.4.21-4.ELsmp
oracle 9i(04)
故障描述:
此服务器为数据库服务器,不定期宕机。宕机后键盘操作无效。只能RESET。
虽然服务器只跑数据库,但里面安装的东西很多很杂。不过我认为这个不是造成宕机的原因。最近一次的宕机时间大约是06年2月15号晚上9:00-10:30。我看了一下日志。没有发现什么。在此,我把一些数据发送上来,希望有达人帮我找找原因,小弟涕泣感谢。
-------------------------------------------------------------------
ipcs -sa的运行结果
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x0102c718 0 root 600 512000 2
0x87383bec 2097153 orcl 640 255852544 70
0x00000000 2064386 gdm 600 393216 2 dest
------ Semaphore Arrays --------
key semid owner perms nsems
0x00000000 65536 hpsmh 600 1
0x00000000 98305 hpsmh 600 1
0x00000000 131074 hpsmh 600 1
0xb75aa078 393219 orcl 640 154
------ Message Queues --------
key msqid owner perms used-bytes messages
--------------------------------------------------------------------------
more /proc/sys/kernel/shmmax的运行结果
4294967295
--------------------------------------------------------------------------
ps -ef的运行结果
UID PID PPID C STIME TTY TIME CMD
root 1 0 0 Feb20 ? 00:00:05 init
root 2 0 0 Feb20 ? 00:00:00 [migration/0]
root 3 0 0 Feb20 ? 00:00:00 [migration/1]
root 4 1 0 Feb20 ? 00:00:00 [keventd]
root 5 1 0 Feb20 ? 00:00:00 [ksoftirqd/0]
root 6 1 0 Feb20 ? 00:00:00 [ksoftirqd/1]
root 9 1 0 Feb20 ? 00:00:00 [bdflush]
root 7 1 0 Feb20 ? 00:00:06 [kswapd]
root 8 1 0 Feb20 ? 00:00:01 [kscand]
root 10 1 0 Feb20 ? 00:00:00 [kupdated]
root 11 1 0 Feb20 ? 00:00:00 [mdrecoveryd]
root 21 1 0 Feb20 ? 00:00:01 [kjournald]
root 78 1 0 Feb20 ? 00:00:00 [khubd]
root 467 1 0 Feb20 ? 00:00:00 [kjournald]
root 468 1 0 Feb20 ? 00:00:01 [kjournald]
root 469 1 0 Feb20 ? 00:00:00 [kjournald]
root 470 1 0 Feb20 ? 00:00:00 [kjournald]
root 471 1 0 Feb20 ? 00:00:00 [kjournald]
root 1912 1 0 Feb20 ? 00:00:00 syslogd -m 0
root 1916 1 0 Feb20 ? 00:00:00 klogd -x
root 1926 1 0 Feb20 ? 00:00:04 irqbalance
rpcuser 1962 1 0 Feb20 ? 00:00:00 rpc.statd
root 2102 1 0 Feb20 ? 00:00:00 /usr/sbin/sshd
root 2116 1 0 Feb20 ? 00:00:00 xinetd -stayalive -pidfile /var/
root 2133 1 0 Feb20 ? 00:00:00 /usr/sbin/vsftpd /etc/vsftpd/vsf
root 2154 1 0 Feb20 ? 00:00:01 sendmail: accepting connections
smmsp 2163 1 0 Feb20 ? 00:00:00 sendmail: Queue runner@01:00:00
root 2173 1 0 Feb20 ? 00:00:01 gpm -t ps/2 -m /dev/mouse
root 2353 1 0 Feb20 ? 00:00:02 /opt/hp/hpsmh/sbin/hpsmhd -DSSL
root 2362 1 0 Feb20 ? 00:00:00 crond
hpsmh 2363 2353 0 Feb20 ? 00:00:00 /opt/hp/hpsmh/sbin/hpsmhd -DSSL
xfs 2424 1 0 Feb20 ? 00:00:00 xfs -droppriv -daemon
daemon 2439 1 0 Feb20 ? 00:00:00 /usr/sbin/atd
root 2448 1 0 Feb20 ? 00:00:00 /usr/sbin/saslauthd -m /var/run/
root 2449 2448 0 Feb20 ? 00:00:00 /usr/sbin/saslauthd -m /var/run/
root 2450 2448 0 Feb20 ? 00:00:00 /usr/sbin/saslauthd -m /var/run/
root 2451 2448 0 Feb20 ? 00:00:00 /usr/sbin/saslauthd -m /var/run/
root 2453 2448 0 Feb20 ? 00:00:00 /usr/sbin/saslauthd -m /var/run/
root 2526 1 0 Feb20 ? 00:00:00 /opt/CA/BABcmagt/caagentd
orcl 2676 1 0 Feb20 ? 00:00:00 /u01/product/9.2.0/bin/tnslsnr L
root 2800 1 0 Feb20 ? 00:00:00 login -- root
root 2801 1 0 Feb20 tty2 00:00:00 /sbin/mingetty tty2
root 2802 1 0 Feb20 tty3 00:00:00 /sbin/mingetty tty3
root 2803 1 0 Feb20 tty4 00:00:00 /sbin/mingetty tty4
root 2804 1 0 Feb20 tty5 00:00:00 /sbin/mingetty tty5
root 2805 1 0 Feb20 ? 00:00:00 login -- root
root 2806 1 0 Feb20 ? 00:00:00 /usr/bin/gdm-binary -nodaemon
root 2865 2806 0 Feb20 ? 00:00:00 /usr/bin/gdm-binary -nodaemon
root 4022 2865 0 Feb20 ? 00:03:40 /usr/X11R6/bin/X :0 -auth /var/g
orcl 4355 1 0 01:19 ? 00:00:03 ora_pmon_zbdb
orcl 4357 1 0 01:19 ? 00:00:00 ora_dbw0_zbdb
orcl 4359 1 0 01:19 ? 00:00:00 ora_lgwr_zbdb
orcl 4361 1 0 01:19 ? 00:00:02 ora_ckpt_zbdb
orcl 4363 1 0 01:19 ? 00:00:00 ora_smon_zbdb
orcl 4365 1 0 01:19 ? 00:00:00 ora_reco_zbdb
orcl 4367 1 0 01:19 ? 00:00:01 ora_cjq0_zbdb
orcl 4369 1 0 01:19 ? 00:00:00 ora_s000_zbdb
orcl 4371 1 0 01:19 ? 00:00:00 ora_d000_zbdb
orcl 4373 1 0 01:19 ? 00:00:00 ora_arc0_zbdb
orcl 4375 1 0 01:19 ? 00:00:00 ora_arc1_zbdb
root 4675 1 0 04:03 ? 00:00:00 cupsd
orcl 4943 1 0 09:00 ? 00:00:00 oraclezbdb (LOCAL=NO)
orcl 4949 1 0 09:11 ? 00:00:00 oraclezbdb (LOCAL=NO)
orcl 4951 1 0 09:17 ? 00:00:00 oraclezbdb (LOCAL=NO)
root 4984 2805 0 10:54 tty6 00:00:00 -bash
root 5065 2865 0 11:04 ? 00:00:00 /usr/bin/gnome-session
root 5136 5065 0 11:04 ? 00:00:00 /usr/bin/ssh-agent /etc/X11/xini
root 5146 1 0 11:04 ? 00:00:00 /usr/libexec/gconfd-2 5
root 5149 1 0 11:04 ? 00:00:00 /usr/libexec/bonobo-activation-s
root 5151 1 0 11:04 ? 00:00:00 gnome-settings-daemon --oaf-acti
root 5156 2116 0 11:04 ? 00:00:00 fam
root 5163 1 0 11:04 ? 00:00:00 /usr/bin/metacity --sm-client-id
root 5167 1 0 11:04 ? 00:00:00 gnome-panel --sm-client-id defau
root 5169 1 0 11:04 ? 00:00:00 nautilus --no-default-window --s
root 5171 1 0 11:04 ? 00:00:01 magicdev --sm-client-id default4
root 5173 1 0 11:04 ? 00:00:00 eggcups --sm-client-id default6
root 5175 1 0 11:04 ? 00:00:00 pam-panel-icon --sm-client-id de
root 5177 1 0 11:04 ? 00:00:10 /usr/bin/python /usr/bin/rhn-app
root 5178 5175 0 11:04 ? 00:00:00 /sbin/pam_timestamp_check -d roo
root 5188 1 0 11:04 ? 00:00:00 /usr/libexec/notification-area-a
root 5189 2800 0 11:05 tty1 00:00:00 -bash
orcl 5550 1 62 11:25 ? 00:03:26 oraclezbdb (LOCAL=NO)
root 5696 5189 0 11:31 tty1 00:00:00 ps -ef
----------------------------------------------------------------------------------------------------------------------------------
还有一些日志文件我打包放在http://www.arteduinfo.com/mrcool/log.tar.gz
谢谢大家了! |
|