1 |
29 |
eightycc |
//////////////////////////////////////////////////////////////////////////////////
|
2 |
|
|
// IBM 650 Reconstruction in Verilog (i650)
|
3 |
|
|
//
|
4 |
|
|
// This file is part of the IBM 650 Reconstruction in Verilog (i650) project
|
5 |
|
|
// http:////www.opencores.org/project,i650
|
6 |
|
|
//
|
7 |
|
|
// Description: An implementation of SOAP 2 for the IBM 650.
|
8 |
|
|
//
|
9 |
|
|
// Additional Comments:
|
10 |
|
|
//
|
11 |
|
|
// Code translation tables for character codes used by early IBM data processing
|
12 |
|
|
// machines. An ibm_codec object translates codes for a specific codeset between
|
13 |
|
|
// Unicode or ASCII and a number of machine-specific codes.
|
14 |
|
|
//
|
15 |
|
|
// Hollerith code is a 12-bit code representing a column on an IBM punched card.
|
16 |
|
|
// The 12-bit column is split into zones and digits, a zero punch being both a
|
17 |
|
|
// zone and a digit. Zone punches are 12, 11, zero, and none, while digit
|
18 |
|
|
// punches are zero, 1, 2, ... 8, and 9. If all 12 rows of a card column are
|
19 |
|
|
// utilized, there are 1024 possible codes per column. In practice, codes
|
20 |
|
|
// were restricted to a single zone punch combined with one or two digit punches.
|
21 |
|
|
// For codes where two digit punches are used, one of those punches will be an 8.
|
22 |
|
|
// The zero punch may act as either a zone or digit: It is considered to be a
|
23 |
|
|
// digit when it appears alone or combined an 11 or 12 punch, otherwise it acts
|
24 |
|
|
// as a zone punch.
|
25 |
|
|
//
|
26 |
|
|
// Binary Coded Decimal (BCD) is a 6-bit code made up of a 2-bit zone code and a
|
27 |
|
|
// 4-bit digit code, providing up to 64 unique codes. Tape (the usual destination
|
28 |
|
|
// and source for BCD data) adds an additional parity bit. The bits comprising
|
29 |
|
|
// BCD characters were recorded on 1/2" magnetic tape in 7 parallel tracks.
|
30 |
|
|
//
|
31 |
|
|
// Due to its method of operation, a BCD character on tape may not consist of
|
32 |
|
|
// all zero bits. Automatic conversion is provided by the tape hardware between
|
33 |
|
|
// an even parity BCD zero (000) and a BCD 'substitute blank' (0020) character
|
34 |
|
|
// when writing to tape. Likewise, a substitute blank is converted to a BCD zero
|
35 |
|
|
// when reading a tape in even parity mode. Tapes written in odd parity mode
|
36 |
|
|
// suffer no such limitations because an odd parity BCD zero (0100) already has
|
37 |
|
|
// a bit set.
|
38 |
|
|
//
|
39 |
|
|
// Present in the BCD character set are codes that have special meaning to various
|
40 |
|
|
// hardware devices. Generally called marks, they serve to delineate character
|
41 |
|
|
// data in various ways. As used by IBM tape systems, the 'tape mark' character is
|
42 |
|
|
// used to delineate a file on tape. Tape hardware may search independently of the
|
43 |
|
|
// CPU for tape marks, accelerating certain types of tape processing.
|
44 |
|
|
//
|
45 |
|
|
// In even parity mode, the 704 tape hardware automatically modifies the BCD zone
|
46 |
|
|
// bits. This translation preserves the BCD collating sequence when character
|
47 |
|
|
// comparisons are performed by binary magnitude comparison.
|
48 |
|
|
//
|
49 |
|
|
// Collating sequence is an important property of any character set. An
|
50 |
|
|
// examination of BCD codes shows that simply sorting by the binary magnitude
|
51 |
|
|
// of the codes will not yield a useful collating sequence. Hardware or
|
52 |
|
|
// software methods are needed to sort this character set. The 701/704/709/709x
|
53 |
|
|
// family of machines translated BCD on its way to and from tape when operating
|
54 |
|
|
// in the even parity mode. Translation conisted of reassigning zones so that
|
55 |
|
|
// BCD alphabetic characters sorted naturally, but did not help with special
|
56 |
|
|
// characters. The 14xx machines incorporated complex hardware logic to compare
|
57 |
|
|
// BCD codes directly, resulting in what we consider to be the authoritative BCD
|
58 |
|
|
// collating sequence. See fig. 64 in A24-3116-0, "System Operation Reference
|
59 |
|
|
// Manual, IBM 1440 Data Processing System". The 1401 BCD compare logic can
|
60 |
|
|
// be found in system diagrams 44.30.11.2, 44.31.11.2, 44.32.11.2, 44.33.11.2,
|
61 |
|
|
// 44.34.11.2, 44.34.21.2, and 44.34.31.2.
|
62 |
|
|
//
|
63 |
|
|
// Early machines used modified card machines (ex. 407 for 704 printer) for unit
|
64 |
|
|
// record I/O, and so were limited to the 48 character BCD codeset supported by
|
65 |
|
|
// these machines.
|
66 |
|
|
//
|
67 |
|
|
//
|
68 |
|
|
// Copyright (c) 2015 Robert Abeles
|
69 |
|
|
//
|
70 |
|
|
// This source file is free software; you can redistribute it
|
71 |
|
|
// and/or modify it under the terms of the GNU Lesser General
|
72 |
|
|
// Public License as published by the Free Software Foundation;
|
73 |
|
|
// either version 2.1 of the License, or (at your option) any
|
74 |
|
|
// later version.
|
75 |
|
|
//
|
76 |
|
|
// This source is distributed in the hope that it will be
|
77 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied
|
78 |
|
|
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
79 |
|
|
// PURPOSE. See the GNU Lesser General Public License for more
|
80 |
|
|
// details.
|
81 |
|
|
//
|
82 |
|
|
// You should have received a copy of the GNU Lesser General
|
83 |
|
|
// Public License along with this source; if not, download it
|
84 |
|
|
// from http://www.opencores.org/lgpl.shtml
|
85 |
|
|
//////////////////////////////////////////////////////////////////////////////////
|
86 |
|
|
|
87 |
|
|
#ifndef __ibm650_soap2__ibm_codec__
|
88 |
|
|
#define __ibm650_soap2__ibm_codec__
|
89 |
|
|
|
90 |
|
|
#include <stdint.h>
|
91 |
|
|
#include <vector>
|
92 |
|
|
|
93 |
|
|
// -------------------------------------------------------------------------------
|
94 |
|
|
// Selects the character code set. Selections may be combined, for example to
|
95 |
|
|
// select the FORTRAN 48 character BCD set, specify cs_bcd48+cs_bcd48_f.
|
96 |
|
|
// -------------------------------------------------------------------------------
|
97 |
|
|
enum ibm_codeset {
|
98 |
|
|
cs_bcd48 = 0x00000001, // 36 letters and digits BCD codeset
|
99 |
|
|
cs_bcd48_a = 0x00000002, // commercial 48 character BCD codeset
|
100 |
|
|
cs_bcd48_f = 0x00000004, // FORTRAN 48 character BCD codeset
|
101 |
|
|
cs_bcd48_h = 0x00000008, // scientific 48 character BCD codeset
|
102 |
|
|
cs_bcd64 = 0x00000010 // 64 character BCD codeset
|
103 |
|
|
};
|
104 |
|
|
|
105 |
|
|
// -------------------------------------------------------------------------------
|
106 |
|
|
// Binary Hollerith punch codes.
|
107 |
|
|
// -------------------------------------------------------------------------------
|
108 |
|
|
enum {
|
109 |
|
|
holl_12_punch = (1 << 11),
|
110 |
|
|
holl_11_punch = (1 << 10),
|
111 |
|
|
holl_0_punch = (1 << 9),
|
112 |
|
|
holl_1_punch = (1 << 8),
|
113 |
|
|
holl_2_punch = (1 << 7),
|
114 |
|
|
holl_3_punch = (1 << 6),
|
115 |
|
|
holl_4_punch = (1 << 5),
|
116 |
|
|
holl_5_punch = (1 << 4),
|
117 |
|
|
holl_6_punch = (1 << 3),
|
118 |
|
|
holl_7_punch = (1 << 2),
|
119 |
|
|
holl_8_punch = (1 << 1),
|
120 |
|
|
holl_9_punch = (1 << 0)
|
121 |
|
|
};
|
122 |
|
|
|
123 |
|
|
// -------------------------------------------------------------------------------
|
124 |
|
|
// Class ibm_codec.
|
125 |
|
|
// -------------------------------------------------------------------------------
|
126 |
|
|
class ibm_codec {
|
127 |
|
|
std::vector<int8_t> hollerith_to_code650_;
|
128 |
|
|
std::vector<int16_t> code650_to_hollerith_;
|
129 |
|
|
std::vector<int32_t> hollerith_to_unicode_;
|
130 |
|
|
std::vector<int16_t> unicode_to_hollerith_;
|
131 |
|
|
std::vector<int32_t> keycode_to_unicode_;
|
132 |
|
|
std::vector<int16_t> hollerith_to_ascii_;
|
133 |
|
|
std::vector<int16_t> ascii_to_hollerith_;
|
134 |
|
|
std::vector<int8_t> ascii_to_code650_;
|
135 |
|
|
std::vector<int16_t> code650_to_ascii_;
|
136 |
|
|
void setup_tables(int);
|
137 |
|
|
|
138 |
|
|
public:
|
139 |
|
|
static inline int clamp_650(int v) {
|
140 |
|
|
return (v < 0) ? 0 : (v > 99) ? 99 : v;
|
141 |
|
|
}
|
142 |
|
|
static inline int clamp_hollerith(int v) {
|
143 |
|
|
return (v < 0) ? 0 : (v > 4095) ? 4095 : v;
|
144 |
|
|
}
|
145 |
|
|
static inline int clamp_unicode(int v) {
|
146 |
|
|
return (v < 0) ? 0 : (v > 65535) ? 65535 : v;
|
147 |
|
|
}
|
148 |
|
|
static inline int clamp_keycode(int v) {
|
149 |
|
|
return (v < 0) ? 0 : (v > 255) ? 0 : v;
|
150 |
|
|
}
|
151 |
|
|
static inline int clamp_ascii(int v) {
|
152 |
|
|
return (v < 0) ? 0 : (v > 255) ? 0 : v;
|
153 |
|
|
}
|
154 |
|
|
ibm_codec(int c) { setup_tables(c); }
|
155 |
|
|
void change_codeset(int c) { setup_tables(c); }
|
156 |
|
|
|
157 |
|
|
inline bool valid_hollerith_for_650(int c) const {
|
158 |
|
|
return code650_to_hollerith_[clamp_650(c)] >= 0;
|
159 |
|
|
}
|
160 |
|
|
inline uint16_t hollerith_to_unicode(int c) const {
|
161 |
|
|
return clamp_unicode(hollerith_to_unicode_[clamp_hollerith(c)]);
|
162 |
|
|
}
|
163 |
|
|
inline bool valid_unicode_for_hollerith(int c) const {
|
164 |
|
|
return hollerith_to_unicode_[clamp_hollerith(c)] >= 0;
|
165 |
|
|
}
|
166 |
|
|
inline uint16_t unicode_to_hollerith(int c) const {
|
167 |
|
|
return clamp_hollerith(unicode_to_hollerith_[clamp_unicode(c)]);
|
168 |
|
|
}
|
169 |
|
|
inline bool valid_hollerith_for_unicode(int c) const {
|
170 |
|
|
return unicode_to_hollerith_[clamp_unicode(c)] >= 0;
|
171 |
|
|
}
|
172 |
|
|
inline uint16_t keycode_to_unicode(int c) const {
|
173 |
|
|
return keycode_to_unicode_[clamp_keycode(c)];
|
174 |
|
|
}
|
175 |
|
|
inline bool valid_unicode_for_keycode(int c) const {
|
176 |
|
|
return keycode_to_unicode_[clamp_keycode(c)] >= 0;
|
177 |
|
|
}
|
178 |
|
|
inline uint16_t hollerith_to_ascii(int c) const {
|
179 |
|
|
return clamp_unicode(hollerith_to_ascii_[clamp_hollerith(c)]);
|
180 |
|
|
}
|
181 |
|
|
inline uint16_t ascii_to_hollerith(int c) const {
|
182 |
|
|
return clamp_hollerith(ascii_to_hollerith_[clamp_ascii(c)]);
|
183 |
|
|
}
|
184 |
|
|
inline bool valid_hollerith_for_ascii(int c) const {
|
185 |
|
|
return ascii_to_hollerith_[clamp_ascii(c)] >= 0;
|
186 |
|
|
}
|
187 |
|
|
inline uint8_t ascii_to_code650(int c) const {
|
188 |
|
|
return clamp_650(ascii_to_code650_[clamp_ascii(c)]);
|
189 |
|
|
}
|
190 |
|
|
inline bool valid_code650_for_ascii(int c) const {
|
191 |
|
|
return ascii_to_code650_[clamp_ascii(c)] >= 0;
|
192 |
|
|
}
|
193 |
|
|
inline uint16_t code650_to_ascii(int c) const {
|
194 |
|
|
return clamp_ascii(code650_to_ascii_[clamp_650(c)]);
|
195 |
|
|
}
|
196 |
|
|
inline bool valid_ascii_for_code650(int c) const {
|
197 |
|
|
return code650_to_ascii_[clamp_650(c)] >= 0;
|
198 |
|
|
}
|
199 |
|
|
};
|
200 |
|
|
|
201 |
|
|
#endif /* defined(__ibm650_soap2__ibm_codec__) */
|