不能分詞八進制和數字類型加前/後綴的情況
拿這個詞法分析器跑了一遍整個Nginx源碼,基本都能正確的分出結果,後面有測試例子~
_STATES_H_
#include
_TOKEN_H_
#include <>
#include
NONE,
KEYWORD,
INT,
FLOAT,
SYMBOL,
VARIABLE,
token() :name(), line_no(
std::
set_all( std::& _line, set_name( std::& str){ name = set_line( l){ line_no = set_attr( token_attr a){ attr =
_SCANNER_H_
#include <algorithm>
#include <>
#include <vector>
#include
#include
scanner( std::& _code) :code(_code), index(), line_no(
std::vector<token> inline is_num( & ch);
inline is_alpha( & ch);
inline is_symbol( & ch);
inline is_variable( & ch);
inline inline std:: std::
std::vector<token>
#include <cctype>
#include <fstream>
#include <iostream>
#include <>
#include
#include
#include
#include
std::vector<token> std:: line = _states state = ch =
(isblank(ch) || isspace(ch) || ch == (ch == ++ ch =
( ch = (ch == && state != ++ (ch == EOF || index ==
IN_NONE:
str = state = str = state = (ch == || ch == ||
ch == || ch == ||
ch == || ch == ||
ch == ||
ch == || ch == ||
ch == || ch == str = -> state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state =
str += state = (ch == || ch == str += state = (ch == str += state =
(str == && (ch == || ch == str += state =
-> state =
(is_num(ch) ||
(ch >= && ch <= ) ||
(ch >= && ch <= str += state =
-> state =
(ch == str += state = (ch == str += state = str += state =
(is_alpha(ch) || is_num(ch) || ch == str += state =
(keywords.find(str) ==
-> state =
(ch == {
str += -> state = (ch == {
str += -> state = (ch == {
str += -> state = {
str += state =
{
-> state =
(ch == {
str += -> state =
{
-> state =
(ch == {
str += -> state = (ch == {
str += -> state = {
str += state =
{
-> state =
(ch == {
str += -> state =
{
-> state =
(ch == {
str += -> state = (ch == {
str += -> state =
{
-> state =
(ch == {
str += -> state = (ch == { state = (ch == {
state =
{
-> state =
(ch == ch = (ch == state =
state =
state =
(ch == state =
state =
(ch == {
str += -> state =
{
-> state =
(ch == {
str += -> state = (ch == {
str += -> state =
{
-> state =
(ch == {
str += -> state = (ch == {
str += -> state =
{
-> state =
(ch == {
str += -> state =
{
-> state =
(ch == {
str += -> state =
{
-> state =
(ch == {
str += -> state = (ch == {
str += -> state =
{
-> state =
str += state =
-> state =
(ch == {
str += ch = (ch == str +=
state = (ch == str += -> state =
str += state =
(ch == str += -> state =
-> state =
(ch != str += state =
str += -> state =
}
}
scanner::is_alpha( & ((ch >= && ch <= ) ||
(ch >= && ch <= scanner::is_num( & (ch >= && ch <= scanner::is_variable( & (ch == || code[index++ --
__LOG_END_NAMESPACE
測試如下:
#include <iostream>
#include <vector>
#include <fstream>
#include
file_name( ofstream ofs( ifs.seekg( std::size_t len = ifs.seekg( _code.resize(len + ifs.read((*)&* _code[len + ] = vector<token> v = ( ofs << s.name << << s.line_no <<
system( }
分詞的代碼選取為Nginx源碼下的一個函數
ngx_epoll_process_events(ngx_cycle_t * ngx_event_t *rev, *wev, ** ngx_connection_t *
i = , ,
ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log,
events = epoll_wait(ep, event_list, (
err = (events == -) ? ngx_errno :
(flags & NGX_UPDATE_TIME ||
(err ==
ngx_event_timer_alarm =
level =
} level =
ngx_log_error(level, cycle->log, err,
(events == (timer !=
ngx_log_error(NGX_LOG_ALERT, cycle->log,
(i = ; i < events; i++ c =
instance = (uintptr_t) c & c = (ngx_connection_t *) ((uintptr_t) c & (uintptr_t) ~
rev = c->
(c->fd == - || rev->instance !=
ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log,
revents =
ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, c->
(revents & (EPOLLERR| ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, c->
0
(revents & ~(EPOLLIN|EPOLLOUT|EPOLLERR| ngx_log_error(NGX_LOG_ALERT, cycle->log, c->
((revents & (EPOLLERR| && (revents & (EPOLLIN|EPOLLOUT)) ==
revents |= EPOLLIN|
((revents & EPOLLIN) && rev->
(NGX_HAVE_EPOLLRDHUP)
(revents & rev->pending_eof =
((flags & NGX_POST_THREAD_EVENTS) && !rev-> rev->posted_ready =
} rev->ready =
(flags & queue = (ngx_event_t **) (rev->accept ?
&ngx_posted_accept_events : &
} rev->
wev = c->
((revents & EPOLLOUT) && wev->
(c->fd == - || wev->instance !=
ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log,
(flags & wev->posted_ready =
} wev->ready =
(flags & ngx_locked_post_event(wev, &
} wev->
}
分詞結果如下
ngx_int_t
ngx_epoll_process_events
(
ngx_cycle_t
*
cycle
,
ngx_msec_t
timer
,
ngx_uint_t
flags
)
{
events
;
uint32_t
revents
;
ngx_int_t
instance
,
i
;
ngx_uint_t
level
;
ngx_err_t
err
;
ngx_event_t
*
rev
,
*
wev
,
*
*
queue
;
ngx_connection_t
*
c
;
i
=
,
,
;
ngx_log_debug1
(
NGX_LOG_DEBUG_EVENT
,
cycle
->
log
,
,
,
timer
)
;
events
=
epoll_wait
(
ep
,
event_list
,
(
)
nevents
,
timer
)
;
err
=
(
events
==
-
)
?
ngx_errno
:
;
(
flags
&
NGX_UPDATE_TIME
||
ngx_event_timer_alarm
)
{
ngx_time_update
(
)
;
}
(
err
)
{
(
err
==
NGX_EINTR
)
{
(
ngx_event_timer_alarm
)
{
ngx_event_timer_alarm
=
;
NGX_OK
;
}
level
=
NGX_LOG_INFO
;
}
{
level
=
NGX_LOG_ALERT
;
}
ngx_log_error
(
level
,
cycle
->
log
,
err
,
)
;
NGX_ERROR
;
}
(
events
==
)
{
(
timer
!=
NGX_TIMER_INFINITE
)
{
NGX_OK
;
}
ngx_log_error
(
NGX_LOG_ALERT
,
cycle
->
log
,
,
)
;
NGX_ERROR
;
}
ngx_mutex_lock
(
ngx_posted_events_mutex
)
;
(
i
=
;
i
<
events
;
i
++
)
{
c
=
event_list
[
i
]
.
data
.
ptr
;
instance
=
(
uintptr_t
)
c
&
;
c
=
(
ngx_connection_t
*
)
(
(
uintptr_t
)
c
&
(
uintptr_t
)
~
)
;
rev
=
c
->
read
;
(
c
->
fd
==
-
||
rev
->
instance
!=
instance
)
{
ngx_log_debug1
(
NGX_LOG_DEBUG_EVENT
,
cycle
->
log
,
,
,
c
)
;
;
}
revents
=
event_list
[
i
]
.
events
;
ngx_log_debug3
(
NGX_LOG_DEBUG_EVENT
,
cycle
->
log
,
,
,
c
->
fd
,
revents
,
event_list
[
i
]
.
data
.
ptr
)
;
(
revents
&
(
EPOLLERR
|
EPOLLHUP
)
)
{
ngx_log_debug2
(
NGX_LOG_DEBUG_EVENT
,
cycle
->
log
,
,
,
c
->
fd
,
revents
)
;
}
#
(
revents
&
~
(
EPOLLIN
|
EPOLLOUT
|
EPOLLERR
|
EPOLLHUP
)
)
{
ngx_log_error
(
NGX_LOG_ALERT
,
cycle
->
log
,
,
,
c
->
fd
,
revents
)
;
}
#
endif
(
(
revents
&
(
EPOLLERR
|
EPOLLHUP
)
)
&&
(
revents
&
(
EPOLLIN
|
EPOLLOUT
)
)
==
)
{
revents
|=
EPOLLIN
|
EPOLLOUT
;
}
(
(
revents
&
EPOLLIN
)
&&
rev
->
active
)
{
#
(
NGX_HAVE_EPOLLRDHUP
)
(
revents
&
EPOLLRDHUP
)
{
rev
->
pending_eof
=
;
}
#
endif
(
(
flags
&
NGX_POST_THREAD_EVENTS
)
&&
!
rev
->
accept
)
{
rev
->
posted_ready
=
;
}
{
rev
->
ready
=
;
}
(
flags
&
NGX_POST_EVENTS
)
{
queue
=
(
ngx_event_t
*
*
)
(
rev
->
accept
?
&
ngx_posted_accept_events
:
&
ngx_posted_events
)
;
ngx_locked_post_event
(
rev
,
queue
)
;
}
{
rev
->
handler
(
rev
)
;
}
}
wev
=
c
->
write
;
(
(
revents
&
EPOLLOUT
)
&&
wev
->
active
)
{
(
c
->
fd
==
-
||
wev
->
instance
!=
instance
)
{
ngx_log_debug1
(
NGX_LOG_DEBUG_EVENT
,
cycle
->
log
,
,
,
c
)
;
;
}
(
flags
&
NGX_POST_THREAD_EVENTS
)
{
wev
->
posted_ready
=
;
}
{
wev
->
ready
=
;
}
(
flags
&
NGX_POST_EVENTS
)
{
ngx_locked_post_event
(
wev
,
&
ngx_posted_events
)
;
}
{
wev
->
handler
(
wev
)
;
}
}
}
ngx_mutex_unlock
(
ngx_posted_events_mutex
)
;
NGX_OK
;
}