OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [ecos-2.0/] [tools/] [src/] [tools/] [configtool/] [standalone/] [wxwin/] [htmlparser.cpp] - Blame information for rev 294

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 26 unneback
//####COPYRIGHTBEGIN####
2
//
3
// ----------------------------------------------------------------------------
4
// Copyright (C) 1998, 1999, 2000 Red Hat, Inc.
5
//
6
// This program is part of the eCos host tools.
7
//
8
// This program is free software; you can redistribute it and/or modify it
9
// under the terms of the GNU General Public License as published by the Free
10
// Software Foundation; either version 2 of the License, or (at your option)
11
// any later version.
12
//
13
// This program is distributed in the hope that it will be useful, but WITHOUT
14
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16
// more details.
17
//
18
// You should have received a copy of the GNU General Public License along with
19
// this program; if not, write to the Free Software Foundation, Inc.,
20
// 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21
//
22
// ----------------------------------------------------------------------------
23
//
24
//####COPYRIGHTEND####
25
// htmlparser.cpp :
26
//
27
//===========================================================================
28
//#####DESCRIPTIONBEGIN####
29
//
30
// Author(s):   julians
31
// Contact(s):  julians
32
// Date:        2001/04/04
33
// Version:     $Id: htmlparser.cpp,v 1.1.1.1 2004-02-14 13:28:46 phoenix Exp $
34
// Purpose:
35
// Description: HTML parser/HTML Help file generator
36
// Requires:
37
// Provides:
38
// See also:
39
// Known bugs:
40
// Usage:
41
//
42
//####DESCRIPTIONEND####
43
//
44
//===========================================================================
45
 
46
// ============================================================================
47
// declarations
48
// ============================================================================
49
 
50
// ----------------------------------------------------------------------------
51
// headers
52
// ----------------------------------------------------------------------------
53
#ifdef __GNUG__
54
#pragma implementation "htmlparser.h"
55
#endif
56
 
57
// Includes other headers for precompiled compilation
58
#include "ecpch.h"
59
 
60
#ifdef __BORLANDC__
61
#pragma hdrstop
62
#endif
63
 
64
#include "wx/textfile.h"
65
#include "wx/wfstream.h"
66
 
67
#include "ecutils.h"
68
#include "htmlparser.h"
69
 
70
/*
71
 * wxSimpleHtmlAttribute
72
 * Representation of an attribute
73
 */
74
 
75
wxSimpleHtmlParser::wxSimpleHtmlParser()
76
{
77
    m_topLevel = NULL;
78
    m_pos = 0;
79
}
80
 
81
 
82
wxSimpleHtmlParser::~wxSimpleHtmlParser()
83
{
84
    Clear();
85
}
86
 
87
bool wxSimpleHtmlParser::ParseFile(const wxString& filename)
88
{
89
    wxTextFile textFile;
90
 
91
    if (textFile.Open(filename))
92
    {
93
        wxString text;
94
        wxString line;
95
        int i;
96
        int count = textFile.GetLineCount();
97
        for (i = 0; i < count; i++)
98
        {
99
            if (i == 0)
100
                line = textFile.GetFirstLine();
101
            else
102
                line = textFile.GetNextLine();
103
 
104
            text += line;
105
            if (i != (count - 1))
106
                text += wxT("\n");
107
        }
108
 
109
#if 0
110
        for ( line = textFile.GetFirstLine(); !textFile.Eof(); line = textFile.GetNextLine() )
111
        {
112
            text += line;
113
            if (!textFile.Eof())
114
                text += wxT("\n");
115
        }
116
#endif
117
 
118
        return ParseString(text);
119
    }
120
    else
121
        return FALSE;
122
}
123
 
124
bool wxSimpleHtmlParser::ParseString(const wxString& str)
125
{
126
    Clear();
127
 
128
    m_pos = 0;
129
    m_text = str;
130
    m_length = str.Length();
131
 
132
    m_topLevel = new wxSimpleHtmlTag(wxT("TOPLEVEL"), wxSimpleHtmlTag_TopLevel);
133
 
134
    return ParseHtml(m_topLevel);
135
}
136
 
137
// Main recursive parsing function
138
bool wxSimpleHtmlParser::ParseHtml(wxSimpleHtmlTag* parent)
139
{
140
    while (!Eof())
141
    {
142
        EatWhitespace();
143
        if (IsComment())
144
        {
145
            ParseComment();
146
        }
147
        else if (IsDirective())
148
        {
149
            wxSimpleHtmlTag* tag = ParseDirective();
150
            if (tag)
151
                parent->AppendTag(tag);
152
        }
153
        else if (IsTagClose())
154
        {
155
            wxSimpleHtmlTag* tag = ParseTagClose();
156
            if (tag)
157
                parent->AppendTag(tag);
158
        }
159
        else if (IsTagStartBracket(GetChar(m_pos)))
160
        {
161
            wxSimpleHtmlTag* tag = ParseTagHeader();
162
            if (tag)
163
                parent->AppendTag(tag);
164
        }
165
        else
166
        {
167
            // Just a text string
168
            wxString text;
169
            ParseText(text);
170
 
171
            wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(wxT("TEXT"), wxSimpleHtmlTag_Text);
172
            tag->SetText(text);
173
            parent->AppendTag(tag);
174
        }
175
    }
176
    return TRUE;
177
}
178
 
179
// Plain text, up until an angled bracket
180
bool wxSimpleHtmlParser::ParseText(wxString& text)
181
{
182
    while (!Eof() && GetChar(m_pos) != wxT('<'))
183
    {
184
        text += GetChar(m_pos);
185
        m_pos ++;
186
    }
187
    return TRUE;
188
}
189
 
190
wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagHeader()
191
{
192
    if (IsTagStartBracket(GetChar(m_pos)))
193
    {
194
        m_pos ++;
195
        EatWhitespace();
196
 
197
        wxString word;
198
        ReadWord(word, TRUE);
199
 
200
        EatWhitespace();
201
 
202
        wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Open);
203
 
204
        ParseAttributes(tag);
205
 
206
        EatWhitespace();
207
 
208
        if (IsTagEndBracket(GetChar(m_pos)))
209
            m_pos ++;
210
 
211
        return tag;
212
    }
213
    else
214
        return NULL;
215
}
216
 
217
wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagClose()
218
{
219
    Matches(wxT("</"), TRUE);
220
 
221
    EatWhitespace();
222
 
223
    wxString word;
224
    ReadWord(word, TRUE);
225
 
226
    EatWhitespace();
227
    m_pos ++;
228
 
229
    wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Close);
230
    return tag;
231
}
232
 
233
bool wxSimpleHtmlParser::ParseAttributes(wxSimpleHtmlTag* tag)
234
{
235
    // Parse attributes of a tag header until we reach >
236
    while (!IsTagEndBracket(GetChar(m_pos)) && !Eof())
237
    {
238
        EatWhitespace();
239
 
240
        wxString attrName, attrValue;
241
 
242
        if (IsString())
243
        {
244
            ReadString(attrName, TRUE);
245
            tag->AppendAttribute(attrName, wxEmptyString);
246
        }
247
        else if (IsNumeric(GetChar(m_pos)))
248
        {
249
            ReadNumber(attrName, TRUE);
250
            tag->AppendAttribute(attrName, wxEmptyString);
251
        }
252
        else
253
        {
254
            // Try to read an attribute name/value pair, or at least a name
255
            // without the value
256
            ReadLiteral(attrName, TRUE);
257
            EatWhitespace();
258
 
259
            if (GetChar(m_pos) == wxT('='))
260
            {
261
                m_pos ++;
262
                EatWhitespace();
263
 
264
                if (IsString())
265
                    ReadString(attrValue, TRUE);
266
                else if (!Eof() && !IsTagEndBracket(GetChar(m_pos)))
267
                    ReadLiteral(attrValue, TRUE);
268
            }
269
            if (!attrName.IsEmpty())
270
                tag->AppendAttribute(attrName, attrValue);
271
        }
272
    }
273
    return TRUE;
274
}
275
 
276
// e.g. <!DOCTYPE ....>
277
wxSimpleHtmlTag* wxSimpleHtmlParser::ParseDirective()
278
{
279
    Matches(wxT("<!"), TRUE);
280
 
281
    EatWhitespace();
282
 
283
    wxString word;
284
    ReadWord(word, TRUE);
285
 
286
    EatWhitespace();
287
 
288
    wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Directive);
289
 
290
    ParseAttributes(tag);
291
 
292
    EatWhitespace();
293
 
294
    if (IsTagEndBracket(GetChar(m_pos)))
295
        m_pos ++;
296
 
297
    return tag;
298
}
299
 
300
bool wxSimpleHtmlParser::ParseComment()
301
{
302
    // Eat the comment tag start
303
    Matches(wxT("<!--"), TRUE);
304
 
305
    while (!Eof() && !Matches(wxT("-->"), TRUE))
306
    {
307
        m_pos ++;
308
    }
309
 
310
    return TRUE;
311
}
312
 
313
bool wxSimpleHtmlParser::EatWhitespace()
314
{
315
    while (!Eof() && IsWhitespace(GetChar(m_pos)))
316
        m_pos ++;
317
    return TRUE;
318
}
319
 
320
bool wxSimpleHtmlParser::EatWhitespace(int& pos)
321
{
322
    while (!Eof(pos) && IsWhitespace(GetChar(pos)))
323
        pos ++;
324
    return TRUE;
325
}
326
 
327
bool wxSimpleHtmlParser::ReadString(wxString& str, bool eatIt)
328
{
329
    int pos = m_pos;
330
    if (GetChar(pos) == (int) '"')
331
    {
332
        pos ++;
333
        while (!Eof(pos) && GetChar(pos) != (int) '"')
334
        {
335
            // TODO: how are quotes escaped in HTML?
336
            str += (wxChar) GetChar(pos);
337
            pos ++;
338
        }
339
        if (GetChar(pos) == (int) '"')
340
            pos ++;
341
        if (eatIt)
342
            m_pos = pos;
343
        return TRUE;
344
    }
345
    else
346
        return FALSE;
347
}
348
 
349
bool wxSimpleHtmlParser::ReadWord(wxString& str, bool eatIt)
350
{
351
    int pos = m_pos;
352
 
353
    if (!IsAlpha(GetChar(pos)))
354
        return FALSE;
355
 
356
    str += (wxChar) GetChar(pos) ;
357
    pos ++;
358
 
359
    while (!Eof(pos) && IsWordChar(GetChar(pos)))
360
    {
361
        str += (wxChar) GetChar(pos);
362
        pos ++;
363
    }
364
    if (eatIt)
365
        m_pos = pos;
366
    return TRUE;
367
}
368
 
369
bool wxSimpleHtmlParser::ReadNumber(wxString& str, bool eatIt)
370
{
371
    int pos = m_pos;
372
 
373
    if (!IsNumeric(GetChar(pos)))
374
        return FALSE;
375
 
376
    str += (wxChar) GetChar(pos) ;
377
    pos ++;
378
 
379
    while (!Eof(pos) && IsNumeric(GetChar(pos)))
380
    {
381
        str += (wxChar) GetChar(pos);
382
        pos ++;
383
    }
384
    if (eatIt)
385
        m_pos = pos;
386
    return TRUE;
387
}
388
 
389
// Could be number, string, whatever, but read up until whitespace or end of tag (but not a quoted string)
390
bool wxSimpleHtmlParser::ReadLiteral(wxString& str, bool eatIt)
391
{
392
    int pos = m_pos;
393
 
394
    while (!Eof(pos) && !IsWhitespace(GetChar(pos)) && !IsTagEndBracket(GetChar(pos)) && GetChar(pos) != wxT('='))
395
    {
396
        str += GetChar(pos);
397
        pos ++;
398
    }
399
    if (eatIt)
400
        m_pos = pos;
401
    return TRUE;
402
}
403
 
404
bool wxSimpleHtmlParser::IsTagClose()
405
{
406
    return Matches(wxT("</"));
407
}
408
 
409
bool wxSimpleHtmlParser::IsComment()
410
{
411
    return Matches(wxT("<!--"));
412
}
413
 
414
bool wxSimpleHtmlParser::IsDirective()
415
{
416
    return Matches(wxT("<!"));
417
}
418
 
419
bool wxSimpleHtmlParser::IsString()
420
{
421
    return (GetChar(m_pos) == (int) '"') ;
422
}
423
 
424
bool wxSimpleHtmlParser::IsWord()
425
{
426
    return (IsAlpha(GetChar(m_pos)));
427
}
428
 
429
bool wxSimpleHtmlParser::IsTagStartBracket(int ch)
430
{
431
    return (ch == wxT('<'));
432
}
433
 
434
bool wxSimpleHtmlParser::IsTagEndBracket(int ch)
435
{
436
    return (ch == wxT('>'));
437
}
438
 
439
bool wxSimpleHtmlParser::IsWhitespace(int ch)
440
{
441
    return ((ch == 13) || (ch == 10) || (ch == 32) || (ch == (int) '\t')) ;
442
}
443
 
444
bool wxSimpleHtmlParser::IsAlpha(int ch)
445
{
446
    return (wxIsalpha((wxChar) ch) != 0);
447
}
448
 
449
bool wxSimpleHtmlParser::IsWordChar(int ch)
450
{
451
    return (wxIsalpha((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('_') || IsNumeric(ch));
452
}
453
 
454
bool wxSimpleHtmlParser::IsNumeric(int ch)
455
{
456
    return (wxIsdigit((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('.')) ;
457
}
458
 
459
// Matches this string (case insensitive)
460
bool wxSimpleHtmlParser::Matches(const wxString& tok, bool eatIt)
461
{
462
    wxString text(m_text.Mid(m_pos, tok.Length()));
463
    bool success = (text.CmpNoCase(tok) == 0) ;
464
    if (success && eatIt)
465
    {
466
        m_pos += tok.Length();
467
    }
468
    return success;
469
}
470
 
471
// Safe way of getting a character
472
int wxSimpleHtmlParser::GetChar(size_t i) const
473
{
474
    if (i >= m_length)
475
        return -1;
476
    return m_text[i];
477
}
478
 
479
void wxSimpleHtmlParser::Clear()
480
{
481
    if (m_topLevel)
482
        delete m_topLevel;
483
    m_topLevel = NULL;
484
    m_text = wxEmptyString;
485
    m_pos = 0;
486
    m_length = 0;
487
}
488
 
489
// Write this file
490
void wxSimpleHtmlParser::Write(wxOutputStream& stream)
491
{
492
    if (m_topLevel)
493
        m_topLevel->Write(stream);
494
}
495
 
496
bool wxSimpleHtmlParser::WriteFile(wxString& filename)
497
{
498
    wxFileOutputStream fstream(filename);
499
    if (fstream.Ok())
500
    {
501
        Write(fstream);
502
        return TRUE;
503
    }
504
    else
505
        return FALSE;
506
}
507
 
508
/*
509
 * wxSimpleHtmlTag
510
 * Representation of a tag or chunk of text
511
 */
512
 
513
wxSimpleHtmlTag::wxSimpleHtmlTag(const wxString& tagName, int tagType)
514
{
515
    m_name = tagName;
516
    m_type = tagType;
517
    m_attributes = NULL;
518
    m_children = NULL;
519
    m_parent = NULL;
520
    m_next = NULL;
521
}
522
 
523
wxSimpleHtmlTag::~wxSimpleHtmlTag()
524
{
525
    ClearAttributes();
526
    ClearChildren();
527
}
528
 
529
//// Operations
530
void wxSimpleHtmlTag::ClearAttributes()
531
{
532
    if (m_attributes)
533
    {
534
        wxSimpleHtmlAttribute* attr = m_attributes;
535
        while (attr)
536
        {
537
            wxSimpleHtmlAttribute* next = attr->m_next;
538
 
539
            attr->m_next = NULL;
540
            delete attr;
541
            attr = next;
542
        }
543
        m_attributes = NULL;
544
    }
545
}
546
 
547
wxSimpleHtmlAttribute* wxSimpleHtmlTag::FindAttribute(const wxString& name) const
548
{
549
    wxSimpleHtmlAttribute* attr = m_attributes;
550
    while (attr)
551
    {
552
        if (attr->GetName().CmpNoCase(name) == 0)
553
        {
554
            return attr;
555
        }
556
        attr = attr->m_next;
557
    }
558
    return NULL;
559
}
560
 
561
void wxSimpleHtmlTag::AppendAttribute(const wxString& name, const wxString& value)
562
{
563
    wxSimpleHtmlAttribute* attr = new wxSimpleHtmlAttribute(name, value);
564
    if (m_attributes)
565
    {
566
        // Find tail
567
        wxSimpleHtmlAttribute* last = m_attributes;
568
        while (last->m_next)
569
            last = last->m_next;
570
 
571
        last->m_next = attr;
572
    }
573
    else
574
        m_attributes = attr;
575
}
576
 
577
void wxSimpleHtmlTag::ClearChildren()
578
{
579
    if (m_children)
580
    {
581
        wxSimpleHtmlTag* child = m_children;
582
        while (child)
583
        {
584
            wxSimpleHtmlTag* next = child->m_next;
585
 
586
            child->m_next = NULL;
587
            delete child;
588
            child = next;
589
        }
590
        m_children = NULL;
591
    }
592
}
593
 
594
void wxSimpleHtmlTag::AppendTag(wxSimpleHtmlTag* tag)
595
{
596
    if (m_children)
597
    {
598
        // Find tail
599
        wxSimpleHtmlTag* last = m_children;
600
        while (last->m_next)
601
            last = last->m_next;
602
 
603
        last->m_next = tag;
604
        tag->m_parent = this;
605
    }
606
    else
607
        m_children = tag;
608
}
609
 
610
// Gets the text from this tag and its descendants
611
wxString wxSimpleHtmlTag::GetTagText()
612
{
613
    wxString text;
614
    if (m_children)
615
    {
616
        wxSimpleHtmlTag* tag = m_children;
617
        while (tag)
618
        {
619
            text += tag->GetTagText();
620
            tag = tag->m_next;
621
        }
622
        return text;
623
    }
624
    else if (GetType() == wxSimpleHtmlTag_Text)
625
        return GetText();
626
    else
627
        return wxEmptyString;
628
}
629
 
630
int wxSimpleHtmlTag::GetAttributeCount() const
631
{
632
    int count = 0;
633
    wxSimpleHtmlAttribute* attr = m_attributes;
634
    while (attr)
635
    {
636
        count ++;
637
        attr = attr->m_next;
638
    }
639
    return count;
640
}
641
 
642
wxSimpleHtmlAttribute* wxSimpleHtmlTag::GetAttribute(int i) const
643
{
644
    int count = 0;
645
    wxSimpleHtmlAttribute* attr = m_attributes;
646
    while (attr)
647
    {
648
        if (count == i)
649
            return attr;
650
        count ++;
651
        attr = attr->m_next;
652
    }
653
    return NULL;
654
}
655
 
656
int wxSimpleHtmlTag::GetChildCount() const
657
{
658
    int count = 0;
659
    wxSimpleHtmlTag* tag = m_children;
660
    while (tag)
661
    {
662
        count ++;
663
        tag = tag->m_next;
664
    }
665
    return count;
666
}
667
 
668
bool wxSimpleHtmlTag::HasAttribute(const wxString& name, const wxString& value) const
669
{
670
    wxSimpleHtmlAttribute* attr = FindAttribute(name);
671
 
672
    return (attr && (attr->GetValue().CmpNoCase(value) == 0)) ;
673
}
674
 
675
bool wxSimpleHtmlTag::HasAttribute(const wxString& name) const
676
{
677
    return FindAttribute(name) != NULL ;
678
}
679
 
680
bool wxSimpleHtmlTag::GetAttributeValue(wxString& value, const wxString& attrName)
681
{
682
    wxSimpleHtmlAttribute* attr = FindAttribute(attrName);
683
    if (attr)
684
    {
685
        value = attr->GetValue();
686
        return TRUE;
687
    }
688
    else
689
        return FALSE;
690
}
691
 
692
// Search forward from this tag until we find a tag with this name & attribute 
693
wxSimpleHtmlTag* wxSimpleHtmlTag::FindTag(const wxString& tagName, const wxString& attrName)
694
{
695
    wxSimpleHtmlTag* tag = m_next;
696
    while (tag)
697
    {
698
        if (tag->NameIs(tagName) && tag->FindAttribute(attrName))
699
            return tag;
700
 
701
        tag = tag->m_next;
702
    }
703
    return NULL;
704
}
705
 
706
bool wxSimpleHtmlTag::FindTextUntilTagClose(wxString& text, const wxString& tagName)
707
{
708
    wxSimpleHtmlTag* tag = this;
709
    while (tag)
710
    {
711
        if (tag->GetType() == wxSimpleHtmlTag_Close && tag->NameIs(tagName))
712
            return TRUE;
713
 
714
        if (tag->GetType() == wxSimpleHtmlTag_Text)
715
            text += tag->GetText();
716
 
717
        tag = tag->m_next;
718
    }
719
    return TRUE;
720
}
721
 
722
 
723
wxSimpleHtmlTag* wxSimpleHtmlTag::GetChild(int i) const
724
{
725
    int count = 0;
726
    wxSimpleHtmlTag* tag = m_children;
727
    while (tag)
728
    {
729
        if (count == i)
730
            return tag;
731
 
732
        count ++;
733
        tag = tag->m_next;
734
    }
735
    return NULL;
736
}
737
 
738
void wxSimpleHtmlTag::Write(wxOutputStream& stream)
739
{
740
    switch (GetType())
741
    {
742
    case wxSimpleHtmlTag_Text:
743
        {
744
            stream << m_text;
745
            break;
746
        }
747
    case wxSimpleHtmlTag_Open:
748
        {
749
            stream << "<" << m_name;
750
            if (GetAttributeCount() > 0)
751
                stream << " ";
752
            int i;
753
            for (i = 0; i < GetAttributeCount(); i++)
754
            {
755
                wxSimpleHtmlAttribute* attr = GetAttribute(i);
756
                attr->Write(stream);
757
                if (i < GetAttributeCount() - 1)
758
                    stream << " ";
759
            }
760
            stream << ">\n";
761
            break;
762
        }
763
    case wxSimpleHtmlTag_Directive:
764
        {
765
            stream << "<!" << m_name << " ";
766
            int i;
767
            for (i = 0; i < GetAttributeCount(); i++)
768
            {
769
                wxSimpleHtmlAttribute* attr = GetAttribute(i);
770
                attr->Write(stream);
771
                if (i < GetAttributeCount() - 1)
772
                    stream << " ";
773
            }
774
            stream << ">\n";
775
            break;
776
        }
777
    case wxSimpleHtmlTag_Close:
778
        {
779
            stream << "</" << m_name << ">\n";
780
            break;
781
        }
782
    default:
783
        {
784
            break;
785
        }
786
    }
787
    wxSimpleHtmlTag* tag = m_children;
788
    while (tag)
789
    {
790
        tag->Write(stream);
791
        tag = tag->m_next;
792
    }
793
 
794
}
795
 
796
void wxSimpleHtmlAttribute::Write(wxOutputStream& stream)
797
{
798
    if (m_value.IsEmpty())
799
        stream << m_name;
800
    else
801
    {
802
        stream << m_name;
803
        stream << "=\"";
804
        stream << m_value;
805
        stream << "\"";
806
    }
807
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.