OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libffi/] [src/] [x86/] [unix64.S] - Blame information for rev 764

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 732 jeremybenn
/* -----------------------------------------------------------------------
2
   unix64.S - Copyright (c) 2002  Bo Thorsen 
3
              Copyright (c) 2008  Red Hat, Inc
4
 
5
   x86-64 Foreign Function Interface
6
 
7
   Permission is hereby granted, free of charge, to any person obtaining
8
   a copy of this software and associated documentation files (the
9
   ``Software''), to deal in the Software without restriction, including
10
   without limitation the rights to use, copy, modify, merge, publish,
11
   distribute, sublicense, and/or sell copies of the Software, and to
12
   permit persons to whom the Software is furnished to do so, subject to
13
   the following conditions:
14
 
15
   The above copyright notice and this permission notice shall be included
16
   in all copies or substantial portions of the Software.
17
 
18
   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
19
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25
   DEALINGS IN THE SOFTWARE.
26
   ----------------------------------------------------------------------- */
27
 
28
#ifdef __x86_64__
29
#define LIBFFI_ASM
30
#include 
31
#include 
32
 
33
.text
34
 
35
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
36
                    void *raddr, void (*fnaddr)(void));
37
 
38
   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
39
   for this function.  This has been allocated by ffi_call.  We also
40
   deallocate some of the stack that has been alloca'd.  */
41
 
42
        .align  2
43
        .globl  ffi_call_unix64
44
        .type   ffi_call_unix64,@function
45
 
46
ffi_call_unix64:
47
.LUW0:
48
        movq    (%rsp), %r10            /* Load return address.  */
49
        leaq    (%rdi, %rsi), %rax      /* Find local stack base.  */
50
        movq    %rdx, (%rax)            /* Save flags.  */
51
        movq    %rcx, 8(%rax)           /* Save raddr.  */
52
        movq    %rbp, 16(%rax)          /* Save old frame pointer.  */
53
        movq    %r10, 24(%rax)          /* Relocate return address.  */
54
        movq    %rax, %rbp              /* Finalize local stack frame.  */
55
.LUW1:
56
        movq    %rdi, %r10              /* Save a copy of the register area. */
57
        movq    %r8, %r11               /* Save a copy of the target fn.  */
58
        movl    %r9d, %eax              /* Set number of SSE registers.  */
59
 
60
        /* Load up all argument registers.  */
61
        movq    (%r10), %rdi
62
        movq    8(%r10), %rsi
63
        movq    16(%r10), %rdx
64
        movq    24(%r10), %rcx
65
        movq    32(%r10), %r8
66
        movq    40(%r10), %r9
67
        testl   %eax, %eax
68
        jnz     .Lload_sse
69
.Lret_from_load_sse:
70
 
71
        /* Deallocate the reg arg area.  */
72
        leaq    176(%r10), %rsp
73
 
74
        /* Call the user function.  */
75
        call    *%r11
76
 
77
        /* Deallocate stack arg area; local stack frame in redzone.  */
78
        leaq    24(%rbp), %rsp
79
 
80
        movq    0(%rbp), %rcx           /* Reload flags.  */
81
        movq    8(%rbp), %rdi           /* Reload raddr.  */
82
        movq    16(%rbp), %rbp          /* Reload old frame pointer.  */
83
.LUW2:
84
 
85
        /* The first byte of the flags contains the FFI_TYPE.  */
86
        movzbl  %cl, %r10d
87
        leaq    .Lstore_table(%rip), %r11
88
        movslq  (%r11, %r10, 4), %r10
89
        addq    %r11, %r10
90
        jmp     *%r10
91
 
92
.Lstore_table:
93
        .long   .Lst_void-.Lstore_table         /* FFI_TYPE_VOID */
94
        .long   .Lst_sint32-.Lstore_table       /* FFI_TYPE_INT */
95
        .long   .Lst_float-.Lstore_table        /* FFI_TYPE_FLOAT */
96
        .long   .Lst_double-.Lstore_table       /* FFI_TYPE_DOUBLE */
97
        .long   .Lst_ldouble-.Lstore_table      /* FFI_TYPE_LONGDOUBLE */
98
        .long   .Lst_uint8-.Lstore_table        /* FFI_TYPE_UINT8 */
99
        .long   .Lst_sint8-.Lstore_table        /* FFI_TYPE_SINT8 */
100
        .long   .Lst_uint16-.Lstore_table       /* FFI_TYPE_UINT16 */
101
        .long   .Lst_sint16-.Lstore_table       /* FFI_TYPE_SINT16 */
102
        .long   .Lst_uint32-.Lstore_table       /* FFI_TYPE_UINT32 */
103
        .long   .Lst_sint32-.Lstore_table       /* FFI_TYPE_SINT32 */
104
        .long   .Lst_int64-.Lstore_table        /* FFI_TYPE_UINT64 */
105
        .long   .Lst_int64-.Lstore_table        /* FFI_TYPE_SINT64 */
106
        .long   .Lst_struct-.Lstore_table       /* FFI_TYPE_STRUCT */
107
        .long   .Lst_int64-.Lstore_table        /* FFI_TYPE_POINTER */
108
 
109
        .align 2
110
.Lst_void:
111
        ret
112
        .align 2
113
 
114
.Lst_uint8:
115
        movzbq  %al, %rax
116
        movq    %rax, (%rdi)
117
        ret
118
        .align 2
119
.Lst_sint8:
120
        movsbq  %al, %rax
121
        movq    %rax, (%rdi)
122
        ret
123
        .align 2
124
.Lst_uint16:
125
        movzwq  %ax, %rax
126
        movq    %rax, (%rdi)
127
        .align 2
128
.Lst_sint16:
129
        movswq  %ax, %rax
130
        movq    %rax, (%rdi)
131
        ret
132
        .align 2
133
.Lst_uint32:
134
        movl    %eax, %eax
135
        movq    %rax, (%rdi)
136
        .align 2
137
.Lst_sint32:
138
        cltq
139
        movq    %rax, (%rdi)
140
        ret
141
        .align 2
142
.Lst_int64:
143
        movq    %rax, (%rdi)
144
        ret
145
 
146
        .align 2
147
.Lst_float:
148
        movss   %xmm0, (%rdi)
149
        ret
150
        .align 2
151
.Lst_double:
152
        movsd   %xmm0, (%rdi)
153
        ret
154
.Lst_ldouble:
155
        fstpt   (%rdi)
156
        ret
157
 
158
        .align 2
159
.Lst_struct:
160
        leaq    -20(%rsp), %rsi         /* Scratch area in redzone.  */
161
 
162
        /* We have to locate the values now, and since we don't want to
163
           write too much data into the user's return value, we spill the
164
           value to a 16 byte scratch area first.  Bits 8, 9, and 10
165
           control where the values are located.  Only one of the three
166
           bits will be set; see ffi_prep_cif_machdep for the pattern.  */
167
        movd    %xmm0, %r10
168
        movd    %xmm1, %r11
169
        testl   $0x100, %ecx
170
        cmovnz  %rax, %rdx
171
        cmovnz  %r10, %rax
172
        testl   $0x200, %ecx
173
        cmovnz  %r10, %rdx
174
        testl   $0x400, %ecx
175
        cmovnz  %r10, %rax
176
        cmovnz  %r11, %rdx
177
        movq    %rax, (%rsi)
178
        movq    %rdx, 8(%rsi)
179
 
180
        /* Bits 12-31 contain the true size of the structure.  Copy from
181
           the scratch area to the true destination.  */
182
        shrl    $12, %ecx
183
        rep movsb
184
        ret
185
 
186
        /* Many times we can avoid loading any SSE registers at all.
187
           It's not worth an indirect jump to load the exact set of
188
           SSE registers needed; zero or all is a good compromise.  */
189
        .align 2
190
.LUW3:
191
.Lload_sse:
192
        movdqa  48(%r10), %xmm0
193
        movdqa  64(%r10), %xmm1
194
        movdqa  80(%r10), %xmm2
195
        movdqa  96(%r10), %xmm3
196
        movdqa  112(%r10), %xmm4
197
        movdqa  128(%r10), %xmm5
198
        movdqa  144(%r10), %xmm6
199
        movdqa  160(%r10), %xmm7
200
        jmp     .Lret_from_load_sse
201
 
202
.LUW4:
203
        .size    ffi_call_unix64,.-ffi_call_unix64
204
 
205
        .align  2
206
        .globl ffi_closure_unix64
207
        .type   ffi_closure_unix64,@function
208
 
209
ffi_closure_unix64:
210
.LUW5:
211
        /* The carry flag is set by the trampoline iff SSE registers
212
           are used.  Don't clobber it before the branch instruction.  */
213
        leaq    -200(%rsp), %rsp
214
.LUW6:
215
        movq    %rdi, (%rsp)
216
        movq    %rsi, 8(%rsp)
217
        movq    %rdx, 16(%rsp)
218
        movq    %rcx, 24(%rsp)
219
        movq    %r8, 32(%rsp)
220
        movq    %r9, 40(%rsp)
221
        jc      .Lsave_sse
222
.Lret_from_save_sse:
223
 
224
        movq    %r10, %rdi
225
        leaq    176(%rsp), %rsi
226
        movq    %rsp, %rdx
227
        leaq    208(%rsp), %rcx
228
        call    ffi_closure_unix64_inner@PLT
229
 
230
        /* Deallocate stack frame early; return value is now in redzone.  */
231
        addq    $200, %rsp
232
.LUW7:
233
 
234
        /* The first byte of the return value contains the FFI_TYPE.  */
235
        movzbl  %al, %r10d
236
        leaq    .Lload_table(%rip), %r11
237
        movslq  (%r11, %r10, 4), %r10
238
        addq    %r11, %r10
239
        jmp     *%r10
240
 
241
.Lload_table:
242
        .long   .Lld_void-.Lload_table          /* FFI_TYPE_VOID */
243
        .long   .Lld_int32-.Lload_table         /* FFI_TYPE_INT */
244
        .long   .Lld_float-.Lload_table         /* FFI_TYPE_FLOAT */
245
        .long   .Lld_double-.Lload_table        /* FFI_TYPE_DOUBLE */
246
        .long   .Lld_ldouble-.Lload_table       /* FFI_TYPE_LONGDOUBLE */
247
        .long   .Lld_int8-.Lload_table          /* FFI_TYPE_UINT8 */
248
        .long   .Lld_int8-.Lload_table          /* FFI_TYPE_SINT8 */
249
        .long   .Lld_int16-.Lload_table         /* FFI_TYPE_UINT16 */
250
        .long   .Lld_int16-.Lload_table         /* FFI_TYPE_SINT16 */
251
        .long   .Lld_int32-.Lload_table         /* FFI_TYPE_UINT32 */
252
        .long   .Lld_int32-.Lload_table         /* FFI_TYPE_SINT32 */
253
        .long   .Lld_int64-.Lload_table         /* FFI_TYPE_UINT64 */
254
        .long   .Lld_int64-.Lload_table         /* FFI_TYPE_SINT64 */
255
        .long   .Lld_struct-.Lload_table        /* FFI_TYPE_STRUCT */
256
        .long   .Lld_int64-.Lload_table         /* FFI_TYPE_POINTER */
257
 
258
        .align 2
259
.Lld_void:
260
        ret
261
 
262
        .align 2
263
.Lld_int8:
264
        movzbl  -24(%rsp), %eax
265
        ret
266
        .align 2
267
.Lld_int16:
268
        movzwl  -24(%rsp), %eax
269
        ret
270
        .align 2
271
.Lld_int32:
272
        movl    -24(%rsp), %eax
273
        ret
274
        .align 2
275
.Lld_int64:
276
        movq    -24(%rsp), %rax
277
        ret
278
 
279
        .align 2
280
.Lld_float:
281
        movss   -24(%rsp), %xmm0
282
        ret
283
        .align 2
284
.Lld_double:
285
        movsd   -24(%rsp), %xmm0
286
        ret
287
        .align 2
288
.Lld_ldouble:
289
        fldt    -24(%rsp)
290
        ret
291
 
292
        .align 2
293
.Lld_struct:
294
        /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
295
           %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
296
           both rdx and xmm1 with the second word.  For the remaining,
297
           bit 8 set means xmm0 gets the second word, and bit 9 means
298
           that rax gets the second word.  */
299
        movq    -24(%rsp), %rcx
300
        movq    -16(%rsp), %rdx
301
        movq    -16(%rsp), %xmm1
302
        testl   $0x100, %eax
303
        cmovnz  %rdx, %rcx
304
        movd    %rcx, %xmm0
305
        testl   $0x200, %eax
306
        movq    -24(%rsp), %rax
307
        cmovnz  %rdx, %rax
308
        ret
309
 
310
        /* See the comment above .Lload_sse; the same logic applies here.  */
311
        .align 2
312
.LUW8:
313
.Lsave_sse:
314
        movdqa  %xmm0, 48(%rsp)
315
        movdqa  %xmm1, 64(%rsp)
316
        movdqa  %xmm2, 80(%rsp)
317
        movdqa  %xmm3, 96(%rsp)
318
        movdqa  %xmm4, 112(%rsp)
319
        movdqa  %xmm5, 128(%rsp)
320
        movdqa  %xmm6, 144(%rsp)
321
        movdqa  %xmm7, 160(%rsp)
322
        jmp     .Lret_from_save_sse
323
 
324
.LUW9:
325
        .size   ffi_closure_unix64,.-ffi_closure_unix64
326
 
327
#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
328
        .section        .eh_frame,"a",@unwind
329
#else
330
        .section        .eh_frame,"a",@progbits
331
#endif
332
.Lframe1:
333
        .long   .LECIE1-.LSCIE1         /* CIE Length */
334
.LSCIE1:
335
        .long   0                        /* CIE Identifier Tag */
336
        .byte   1                       /* CIE Version */
337
        .ascii "zR\0"                   /* CIE Augmentation */
338
        .uleb128 1                      /* CIE Code Alignment Factor */
339
        .sleb128 -8                     /* CIE Data Alignment Factor */
340
        .byte   0x10                    /* CIE RA Column */
341
        .uleb128 1                      /* Augmentation size */
342
        .byte   0x1b                    /* FDE Encoding (pcrel sdata4) */
343
        .byte   0xc                     /* DW_CFA_def_cfa, %rsp offset 8 */
344
        .uleb128 7
345
        .uleb128 8
346
        .byte   0x80+16                 /* DW_CFA_offset, %rip offset 1*-8 */
347
        .uleb128 1
348
        .align 8
349
.LECIE1:
350
.LSFDE1:
351
        .long   .LEFDE1-.LASFDE1        /* FDE Length */
352
.LASFDE1:
353
        .long   .LASFDE1-.Lframe1       /* FDE CIE offset */
354
#if HAVE_AS_X86_PCREL
355
        .long   .LUW0-.                 /* FDE initial location */
356
#else
357
        .long   .LUW0@rel
358
#endif
359
        .long   .LUW4-.LUW0             /* FDE address range */
360
        .uleb128 0x0                    /* Augmentation size */
361
 
362
        .byte   0x4                     /* DW_CFA_advance_loc4 */
363
        .long   .LUW1-.LUW0
364
 
365
        /* New stack frame based off rbp.  This is a itty bit of unwind
366
           trickery in that the CFA *has* changed.  There is no easy way
367
           to describe it correctly on entry to the function.  Fortunately,
368
           it doesn't matter too much since at all points we can correctly
369
           unwind back to ffi_call.  Note that the location to which we
370
           moved the return address is (the new) CFA-8, so from the
371
           perspective of the unwind info, it hasn't moved.  */
372
        .byte   0xc                     /* DW_CFA_def_cfa, %rbp offset 32 */
373
        .uleb128 6
374
        .uleb128 32
375
        .byte   0x80+6                  /* DW_CFA_offset, %rbp offset 2*-8 */
376
        .uleb128 2
377
        .byte   0xa                     /* DW_CFA_remember_state */
378
 
379
        .byte   0x4                     /* DW_CFA_advance_loc4 */
380
        .long   .LUW2-.LUW1
381
        .byte   0xc                     /* DW_CFA_def_cfa, %rsp offset 8 */
382
        .uleb128 7
383
        .uleb128 8
384
        .byte   0xc0+6                  /* DW_CFA_restore, %rbp */
385
 
386
        .byte   0x4                     /* DW_CFA_advance_loc4 */
387
        .long   .LUW3-.LUW2
388
        .byte   0xb                     /* DW_CFA_restore_state */
389
 
390
        .align 8
391
.LEFDE1:
392
.LSFDE3:
393
        .long   .LEFDE3-.LASFDE3        /* FDE Length */
394
.LASFDE3:
395
        .long   .LASFDE3-.Lframe1       /* FDE CIE offset */
396
#if HAVE_AS_X86_PCREL
397
        .long   .LUW5-.                 /* FDE initial location */
398
#else
399
        .long   .LUW5@rel
400
#endif
401
        .long   .LUW9-.LUW5             /* FDE address range */
402
        .uleb128 0x0                    /* Augmentation size */
403
 
404
        .byte   0x4                     /* DW_CFA_advance_loc4 */
405
        .long   .LUW6-.LUW5
406
        .byte   0xe                     /* DW_CFA_def_cfa_offset */
407
        .uleb128 208
408
        .byte   0xa                     /* DW_CFA_remember_state */
409
 
410
        .byte   0x4                     /* DW_CFA_advance_loc4 */
411
        .long   .LUW7-.LUW6
412
        .byte   0xe                     /* DW_CFA_def_cfa_offset */
413
        .uleb128 8
414
 
415
        .byte   0x4                     /* DW_CFA_advance_loc4 */
416
        .long   .LUW8-.LUW7
417
        .byte   0xb                     /* DW_CFA_restore_state */
418
 
419
        .align 8
420
.LEFDE3:
421
 
422
#endif /* __x86_64__ */
423
 
424
#if defined __ELF__ && defined __linux__
425
        .section        .note.GNU-stack,"",@progbits
426
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.