OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [ecos-3.0/] [host/] [tools/] [configtool/] [standalone/] [wxwin/] [htmlparser.cpp] - Blame information for rev 790

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 786 skrzyp
// ####ECOSHOSTGPLCOPYRIGHTBEGIN####                                        
2
// -------------------------------------------                              
3
// This file is part of the eCos host tools.                                
4
// Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.            
5
//
6
// This program is free software; you can redistribute it and/or modify     
7
// it under the terms of the GNU General Public License as published by     
8
// the Free Software Foundation; either version 2 or (at your option) any   
9
// later version.                                                           
10
//
11
// This program is distributed in the hope that it will be useful, but      
12
// WITHOUT ANY WARRANTY; without even the implied warranty of               
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU        
14
// General Public License for more details.                                 
15
//
16
// You should have received a copy of the GNU General Public License        
17
// along with this program; if not, write to the                            
18
// Free Software Foundation, Inc., 51 Franklin Street,                      
19
// Fifth Floor, Boston, MA  02110-1301, USA.                                
20
// -------------------------------------------                              
21
// ####ECOSHOSTGPLCOPYRIGHTEND####                                          
22
// htmlparser.cpp :
23
//
24
//===========================================================================
25
//#####DESCRIPTIONBEGIN####
26
//
27
// Author(s):   julians
28
// Contact(s):  julians
29
// Date:        2001/04/04
30
// Version:     $Id: htmlparser.cpp,v 1.6 2001/04/12 10:02:22 julians Exp $
31
// Purpose:
32
// Description: HTML parser/HTML Help file generator
33
// Requires:
34
// Provides:
35
// See also:
36
// Known bugs:
37
// Usage:
38
//
39
//####DESCRIPTIONEND####
40
//
41
//===========================================================================
42
 
43
// ============================================================================
44
// declarations
45
// ============================================================================
46
 
47
// ----------------------------------------------------------------------------
48
// headers
49
// ----------------------------------------------------------------------------
50
#ifdef __GNUG__
51
#pragma implementation "htmlparser.h"
52
#endif
53
 
54
// Includes other headers for precompiled compilation
55
#include "ecpch.h"
56
 
57
#ifdef __BORLANDC__
58
#pragma hdrstop
59
#endif
60
 
61
#include "wx/textfile.h"
62
#include "wx/wfstream.h"
63
 
64
#include "ecutils.h"
65
#include "htmlparser.h"
66
 
67
/*
68
 * wxSimpleHtmlAttribute
69
 * Representation of an attribute
70
 */
71
 
72
wxSimpleHtmlParser::wxSimpleHtmlParser()
73
{
74
    m_topLevel = NULL;
75
    m_pos = 0;
76
}
77
 
78
 
79
wxSimpleHtmlParser::~wxSimpleHtmlParser()
80
{
81
    Clear();
82
}
83
 
84
bool wxSimpleHtmlParser::ParseFile(const wxString& filename)
85
{
86
    wxTextFile textFile;
87
 
88
    if (textFile.Open(filename))
89
    {
90
        wxString text;
91
        wxString line;
92
        int i;
93
        int count = textFile.GetLineCount();
94
        for (i = 0; i < count; i++)
95
        {
96
            if (i == 0)
97
                line = textFile.GetFirstLine();
98
            else
99
                line = textFile.GetNextLine();
100
 
101
            text += line;
102
            if (i != (count - 1))
103
                text += wxT("\n");
104
        }
105
 
106
#if 0
107
        for ( line = textFile.GetFirstLine(); !textFile.Eof(); line = textFile.GetNextLine() )
108
        {
109
            text += line;
110
            if (!textFile.Eof())
111
                text += wxT("\n");
112
        }
113
#endif
114
 
115
        return ParseString(text);
116
    }
117
    else
118
        return FALSE;
119
}
120
 
121
bool wxSimpleHtmlParser::ParseString(const wxString& str)
122
{
123
    Clear();
124
 
125
    m_pos = 0;
126
    m_text = str;
127
    m_length = str.Length();
128
 
129
    m_topLevel = new wxSimpleHtmlTag(wxT("TOPLEVEL"), wxSimpleHtmlTag_TopLevel);
130
 
131
    return ParseHtml(m_topLevel);
132
}
133
 
134
// Main recursive parsing function
135
bool wxSimpleHtmlParser::ParseHtml(wxSimpleHtmlTag* parent)
136
{
137
    while (!Eof())
138
    {
139
        EatWhitespace();
140
        if (IsComment())
141
        {
142
            ParseComment();
143
        }
144
        else if (IsDirective())
145
        {
146
            wxSimpleHtmlTag* tag = ParseDirective();
147
            if (tag)
148
                parent->AppendTag(tag);
149
        }
150
        else if (IsTagClose())
151
        {
152
            wxSimpleHtmlTag* tag = ParseTagClose();
153
            if (tag)
154
                parent->AppendTag(tag);
155
        }
156
        else if (IsTagStartBracket(GetChar(m_pos)))
157
        {
158
            wxSimpleHtmlTag* tag = ParseTagHeader();
159
            if (tag)
160
                parent->AppendTag(tag);
161
        }
162
        else
163
        {
164
            // Just a text string
165
            wxString text;
166
            ParseText(text);
167
 
168
            wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(wxT("TEXT"), wxSimpleHtmlTag_Text);
169
            tag->SetText(text);
170
            parent->AppendTag(tag);
171
        }
172
    }
173
    return TRUE;
174
}
175
 
176
// Plain text, up until an angled bracket
177
bool wxSimpleHtmlParser::ParseText(wxString& text)
178
{
179
    while (!Eof() && GetChar(m_pos) != wxT('<'))
180
    {
181
        text += GetChar(m_pos);
182
        m_pos ++;
183
    }
184
    return TRUE;
185
}
186
 
187
wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagHeader()
188
{
189
    if (IsTagStartBracket(GetChar(m_pos)))
190
    {
191
        m_pos ++;
192
        EatWhitespace();
193
 
194
        wxString word;
195
        ReadWord(word, TRUE);
196
 
197
        EatWhitespace();
198
 
199
        wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Open);
200
 
201
        ParseAttributes(tag);
202
 
203
        EatWhitespace();
204
 
205
        if (IsTagEndBracket(GetChar(m_pos)))
206
            m_pos ++;
207
 
208
        return tag;
209
    }
210
    else
211
        return NULL;
212
}
213
 
214
wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagClose()
215
{
216
    Matches(wxT("</"), TRUE);
217
 
218
    EatWhitespace();
219
 
220
    wxString word;
221
    ReadWord(word, TRUE);
222
 
223
    EatWhitespace();
224
    m_pos ++;
225
 
226
    wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Close);
227
    return tag;
228
}
229
 
230
bool wxSimpleHtmlParser::ParseAttributes(wxSimpleHtmlTag* tag)
231
{
232
    // Parse attributes of a tag header until we reach >
233
    while (!IsTagEndBracket(GetChar(m_pos)) && !Eof())
234
    {
235
        EatWhitespace();
236
 
237
        wxString attrName, attrValue;
238
 
239
        if (IsString())
240
        {
241
            ReadString(attrName, TRUE);
242
            tag->AppendAttribute(attrName, wxEmptyString);
243
        }
244
        else if (IsNumeric(GetChar(m_pos)))
245
        {
246
            ReadNumber(attrName, TRUE);
247
            tag->AppendAttribute(attrName, wxEmptyString);
248
        }
249
        else
250
        {
251
            // Try to read an attribute name/value pair, or at least a name
252
            // without the value
253
            ReadLiteral(attrName, TRUE);
254
            EatWhitespace();
255
 
256
            if (GetChar(m_pos) == wxT('='))
257
            {
258
                m_pos ++;
259
                EatWhitespace();
260
 
261
                if (IsString())
262
                    ReadString(attrValue, TRUE);
263
                else if (!Eof() && !IsTagEndBracket(GetChar(m_pos)))
264
                    ReadLiteral(attrValue, TRUE);
265
            }
266
            if (!attrName.IsEmpty())
267
                tag->AppendAttribute(attrName, attrValue);
268
        }
269
    }
270
    return TRUE;
271
}
272
 
273
// e.g. <!DOCTYPE ....>
274
wxSimpleHtmlTag* wxSimpleHtmlParser::ParseDirective()
275
{
276
    Matches(wxT("<!"), TRUE);
277
 
278
    EatWhitespace();
279
 
280
    wxString word;
281
    ReadWord(word, TRUE);
282
 
283
    EatWhitespace();
284
 
285
    wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Directive);
286
 
287
    ParseAttributes(tag);
288
 
289
    EatWhitespace();
290
 
291
    if (IsTagEndBracket(GetChar(m_pos)))
292
        m_pos ++;
293
 
294
    return tag;
295
}
296
 
297
bool wxSimpleHtmlParser::ParseComment()
298
{
299
    // Eat the comment tag start
300
    Matches(wxT("<!--"), TRUE);
301
 
302
    while (!Eof() && !Matches(wxT("-->"), TRUE))
303
    {
304
        m_pos ++;
305
    }
306
 
307
    return TRUE;
308
}
309
 
310
bool wxSimpleHtmlParser::EatWhitespace()
311
{
312
    while (!Eof() && IsWhitespace(GetChar(m_pos)))
313
        m_pos ++;
314
    return TRUE;
315
}
316
 
317
bool wxSimpleHtmlParser::EatWhitespace(int& pos)
318
{
319
    while (!Eof(pos) && IsWhitespace(GetChar(pos)))
320
        pos ++;
321
    return TRUE;
322
}
323
 
324
bool wxSimpleHtmlParser::ReadString(wxString& str, bool eatIt)
325
{
326
    int pos = m_pos;
327
    if (GetChar(pos) == (int) '"')
328
    {
329
        pos ++;
330
        while (!Eof(pos) && GetChar(pos) != (int) '"')
331
        {
332
            // TODO: how are quotes escaped in HTML?
333
            str += (wxChar) GetChar(pos);
334
            pos ++;
335
        }
336
        if (GetChar(pos) == (int) '"')
337
            pos ++;
338
        if (eatIt)
339
            m_pos = pos;
340
        return TRUE;
341
    }
342
    else
343
        return FALSE;
344
}
345
 
346
bool wxSimpleHtmlParser::ReadWord(wxString& str, bool eatIt)
347
{
348
    int pos = m_pos;
349
 
350
    if (!IsAlpha(GetChar(pos)))
351
        return FALSE;
352
 
353
    str += (wxChar) GetChar(pos) ;
354
    pos ++;
355
 
356
    while (!Eof(pos) && IsWordChar(GetChar(pos)))
357
    {
358
        str += (wxChar) GetChar(pos);
359
        pos ++;
360
    }
361
    if (eatIt)
362
        m_pos = pos;
363
    return TRUE;
364
}
365
 
366
bool wxSimpleHtmlParser::ReadNumber(wxString& str, bool eatIt)
367
{
368
    int pos = m_pos;
369
 
370
    if (!IsNumeric(GetChar(pos)))
371
        return FALSE;
372
 
373
    str += (wxChar) GetChar(pos) ;
374
    pos ++;
375
 
376
    while (!Eof(pos) && IsNumeric(GetChar(pos)))
377
    {
378
        str += (wxChar) GetChar(pos);
379
        pos ++;
380
    }
381
    if (eatIt)
382
        m_pos = pos;
383
    return TRUE;
384
}
385
 
386
// Could be number, string, whatever, but read up until whitespace or end of tag (but not a quoted string)
387
bool wxSimpleHtmlParser::ReadLiteral(wxString& str, bool eatIt)
388
{
389
    int pos = m_pos;
390
 
391
    while (!Eof(pos) && !IsWhitespace(GetChar(pos)) && !IsTagEndBracket(GetChar(pos)) && GetChar(pos) != wxT('='))
392
    {
393
        str += GetChar(pos);
394
        pos ++;
395
    }
396
    if (eatIt)
397
        m_pos = pos;
398
    return TRUE;
399
}
400
 
401
bool wxSimpleHtmlParser::IsTagClose()
402
{
403
    return Matches(wxT("</"));
404
}
405
 
406
bool wxSimpleHtmlParser::IsComment()
407
{
408
    return Matches(wxT("<!--"));
409
}
410
 
411
bool wxSimpleHtmlParser::IsDirective()
412
{
413
    return Matches(wxT("<!"));
414
}
415
 
416
bool wxSimpleHtmlParser::IsString()
417
{
418
    return (GetChar(m_pos) == (int) '"') ;
419
}
420
 
421
bool wxSimpleHtmlParser::IsWord()
422
{
423
    return (IsAlpha(GetChar(m_pos)));
424
}
425
 
426
bool wxSimpleHtmlParser::IsTagStartBracket(int ch)
427
{
428
    return (ch == wxT('<'));
429
}
430
 
431
bool wxSimpleHtmlParser::IsTagEndBracket(int ch)
432
{
433
    return (ch == wxT('>'));
434
}
435
 
436
bool wxSimpleHtmlParser::IsWhitespace(int ch)
437
{
438
    return ((ch == 13) || (ch == 10) || (ch == 32) || (ch == (int) '\t')) ;
439
}
440
 
441
bool wxSimpleHtmlParser::IsAlpha(int ch)
442
{
443
    return (wxIsalpha((wxChar) ch) != 0);
444
}
445
 
446
bool wxSimpleHtmlParser::IsWordChar(int ch)
447
{
448
    return (wxIsalpha((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('_') || IsNumeric(ch));
449
}
450
 
451
bool wxSimpleHtmlParser::IsNumeric(int ch)
452
{
453
    return (wxIsdigit((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('.')) ;
454
}
455
 
456
// Matches this string (case insensitive)
457
bool wxSimpleHtmlParser::Matches(const wxString& tok, bool eatIt)
458
{
459
    wxString text(m_text.Mid(m_pos, tok.Length()));
460
    bool success = (text.CmpNoCase(tok) == 0) ;
461
    if (success && eatIt)
462
    {
463
        m_pos += tok.Length();
464
    }
465
    return success;
466
}
467
 
468
// Safe way of getting a character
469
int wxSimpleHtmlParser::GetChar(size_t i) const
470
{
471
    if (i >= m_length)
472
        return -1;
473
    return m_text[i];
474
}
475
 
476
void wxSimpleHtmlParser::Clear()
477
{
478
    if (m_topLevel)
479
        delete m_topLevel;
480
    m_topLevel = NULL;
481
    m_text = wxEmptyString;
482
    m_pos = 0;
483
    m_length = 0;
484
}
485
 
486
// Write this file
487
void wxSimpleHtmlParser::Write(wxOutputStream& stream)
488
{
489
    if (m_topLevel)
490
        m_topLevel->Write(stream);
491
}
492
 
493
bool wxSimpleHtmlParser::WriteFile(wxString& filename)
494
{
495
    wxFileOutputStream fstream(filename);
496
    if (fstream.Ok())
497
    {
498
        Write(fstream);
499
        return TRUE;
500
    }
501
    else
502
        return FALSE;
503
}
504
 
505
/*
506
 * wxSimpleHtmlTag
507
 * Representation of a tag or chunk of text
508
 */
509
 
510
wxSimpleHtmlTag::wxSimpleHtmlTag(const wxString& tagName, int tagType)
511
{
512
    m_name = tagName;
513
    m_type = tagType;
514
    m_attributes = NULL;
515
    m_children = NULL;
516
    m_parent = NULL;
517
    m_next = NULL;
518
}
519
 
520
wxSimpleHtmlTag::~wxSimpleHtmlTag()
521
{
522
    ClearAttributes();
523
    ClearChildren();
524
}
525
 
526
//// Operations
527
void wxSimpleHtmlTag::ClearAttributes()
528
{
529
    if (m_attributes)
530
    {
531
        wxSimpleHtmlAttribute* attr = m_attributes;
532
        while (attr)
533
        {
534
            wxSimpleHtmlAttribute* next = attr->m_next;
535
 
536
            attr->m_next = NULL;
537
            delete attr;
538
            attr = next;
539
        }
540
        m_attributes = NULL;
541
    }
542
}
543
 
544
wxSimpleHtmlAttribute* wxSimpleHtmlTag::FindAttribute(const wxString& name) const
545
{
546
    wxSimpleHtmlAttribute* attr = m_attributes;
547
    while (attr)
548
    {
549
        if (attr->GetName().CmpNoCase(name) == 0)
550
        {
551
            return attr;
552
        }
553
        attr = attr->m_next;
554
    }
555
    return NULL;
556
}
557
 
558
void wxSimpleHtmlTag::AppendAttribute(const wxString& name, const wxString& value)
559
{
560
    wxSimpleHtmlAttribute* attr = new wxSimpleHtmlAttribute(name, value);
561
    if (m_attributes)
562
    {
563
        // Find tail
564
        wxSimpleHtmlAttribute* last = m_attributes;
565
        while (last->m_next)
566
            last = last->m_next;
567
 
568
        last->m_next = attr;
569
    }
570
    else
571
        m_attributes = attr;
572
}
573
 
574
void wxSimpleHtmlTag::ClearChildren()
575
{
576
    if (m_children)
577
    {
578
        wxSimpleHtmlTag* child = m_children;
579
        while (child)
580
        {
581
            wxSimpleHtmlTag* next = child->m_next;
582
 
583
            child->m_next = NULL;
584
            delete child;
585
            child = next;
586
        }
587
        m_children = NULL;
588
    }
589
}
590
 
591
void wxSimpleHtmlTag::AppendTag(wxSimpleHtmlTag* tag)
592
{
593
    if (m_children)
594
    {
595
        // Find tail
596
        wxSimpleHtmlTag* last = m_children;
597
        while (last->m_next)
598
            last = last->m_next;
599
 
600
        last->m_next = tag;
601
        tag->m_parent = this;
602
    }
603
    else
604
        m_children = tag;
605
}
606
 
607
// Gets the text from this tag and its descendants
608
wxString wxSimpleHtmlTag::GetTagText()
609
{
610
    wxString text;
611
    if (m_children)
612
    {
613
        wxSimpleHtmlTag* tag = m_children;
614
        while (tag)
615
        {
616
            text += tag->GetTagText();
617
            tag = tag->m_next;
618
        }
619
        return text;
620
    }
621
    else if (GetType() == wxSimpleHtmlTag_Text)
622
        return GetText();
623
    else
624
        return wxEmptyString;
625
}
626
 
627
int wxSimpleHtmlTag::GetAttributeCount() const
628
{
629
    int count = 0;
630
    wxSimpleHtmlAttribute* attr = m_attributes;
631
    while (attr)
632
    {
633
        count ++;
634
        attr = attr->m_next;
635
    }
636
    return count;
637
}
638
 
639
wxSimpleHtmlAttribute* wxSimpleHtmlTag::GetAttribute(int i) const
640
{
641
    int count = 0;
642
    wxSimpleHtmlAttribute* attr = m_attributes;
643
    while (attr)
644
    {
645
        if (count == i)
646
            return attr;
647
        count ++;
648
        attr = attr->m_next;
649
    }
650
    return NULL;
651
}
652
 
653
int wxSimpleHtmlTag::GetChildCount() const
654
{
655
    int count = 0;
656
    wxSimpleHtmlTag* tag = m_children;
657
    while (tag)
658
    {
659
        count ++;
660
        tag = tag->m_next;
661
    }
662
    return count;
663
}
664
 
665
bool wxSimpleHtmlTag::HasAttribute(const wxString& name, const wxString& value) const
666
{
667
    wxSimpleHtmlAttribute* attr = FindAttribute(name);
668
 
669
    return (attr && (attr->GetValue().CmpNoCase(value) == 0)) ;
670
}
671
 
672
bool wxSimpleHtmlTag::HasAttribute(const wxString& name) const
673
{
674
    return FindAttribute(name) != NULL ;
675
}
676
 
677
bool wxSimpleHtmlTag::GetAttributeValue(wxString& value, const wxString& attrName)
678
{
679
    wxSimpleHtmlAttribute* attr = FindAttribute(attrName);
680
    if (attr)
681
    {
682
        value = attr->GetValue();
683
        return TRUE;
684
    }
685
    else
686
        return FALSE;
687
}
688
 
689
// Search forward from this tag until we find a tag with this name & attribute 
690
wxSimpleHtmlTag* wxSimpleHtmlTag::FindTag(const wxString& tagName, const wxString& attrName)
691
{
692
    wxSimpleHtmlTag* tag = m_next;
693
    while (tag)
694
    {
695
        if (tag->NameIs(tagName) && tag->FindAttribute(attrName))
696
            return tag;
697
 
698
        tag = tag->m_next;
699
    }
700
    return NULL;
701
}
702
 
703
bool wxSimpleHtmlTag::FindTextUntilTagClose(wxString& text, const wxString& tagName)
704
{
705
    wxSimpleHtmlTag* tag = this;
706
    while (tag)
707
    {
708
        if (tag->GetType() == wxSimpleHtmlTag_Close && tag->NameIs(tagName))
709
            return TRUE;
710
 
711
        if (tag->GetType() == wxSimpleHtmlTag_Text)
712
            text += tag->GetText();
713
 
714
        tag = tag->m_next;
715
    }
716
    return TRUE;
717
}
718
 
719
 
720
wxSimpleHtmlTag* wxSimpleHtmlTag::GetChild(int i) const
721
{
722
    int count = 0;
723
    wxSimpleHtmlTag* tag = m_children;
724
    while (tag)
725
    {
726
        if (count == i)
727
            return tag;
728
 
729
        count ++;
730
        tag = tag->m_next;
731
    }
732
    return NULL;
733
}
734
 
735
void wxSimpleHtmlTag::Write(wxOutputStream& stream)
736
{
737
    switch (GetType())
738
    {
739
    case wxSimpleHtmlTag_Text:
740
        {
741
            stream << m_text;
742
            break;
743
        }
744
    case wxSimpleHtmlTag_Open:
745
        {
746
            stream << "<" << m_name;
747
            if (GetAttributeCount() > 0)
748
                stream << " ";
749
            int i;
750
            for (i = 0; i < GetAttributeCount(); i++)
751
            {
752
                wxSimpleHtmlAttribute* attr = GetAttribute(i);
753
                attr->Write(stream);
754
                if (i < GetAttributeCount() - 1)
755
                    stream << " ";
756
            }
757
            stream << ">\n";
758
            break;
759
        }
760
    case wxSimpleHtmlTag_Directive:
761
        {
762
            stream << "<!" << m_name << " ";
763
            int i;
764
            for (i = 0; i < GetAttributeCount(); i++)
765
            {
766
                wxSimpleHtmlAttribute* attr = GetAttribute(i);
767
                attr->Write(stream);
768
                if (i < GetAttributeCount() - 1)
769
                    stream << " ";
770
            }
771
            stream << ">\n";
772
            break;
773
        }
774
    case wxSimpleHtmlTag_Close:
775
        {
776
            stream << "</" << m_name << ">\n";
777
            break;
778
        }
779
    default:
780
        {
781
            break;
782
        }
783
    }
784
    wxSimpleHtmlTag* tag = m_children;
785
    while (tag)
786
    {
787
        tag->Write(stream);
788
        tag = tag->m_next;
789
    }
790
 
791
}
792
 
793
void wxSimpleHtmlAttribute::Write(wxOutputStream& stream)
794
{
795
    if (m_value.IsEmpty())
796
        stream << m_name;
797
    else
798
    {
799
        stream << m_name;
800
        stream << "=\"";
801
        stream << m_value;
802
        stream << "\"";
803
    }
804
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.