-
Notifications
You must be signed in to change notification settings - Fork 4
/
nx_zlib.h
515 lines (435 loc) · 17.7 KB
/
nx_zlib.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
/*
* NX-GZIP compression accelerator user library
* implementing zlib library interfaces
*
* Copyright (C) IBM Corporation, 2011-2017
*
* Licenses for GPLv2 and Apache v2.0:
*
* GPLv2:
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*
* Apache v2.0:
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Authors: Bulent Abali <[email protected]>
* Xiao Lei Hu <[email protected]>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <assert.h>
#include <errno.h>
#include <sys/fcntl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <endian.h>
#include <pthread.h>
#include <sys/platform/ppc.h>
#include "nxu.h"
#include "nx_dbg.h"
#ifndef _NX_ZLIB_H
#define _NX_ZLIB_H
#define NX_GZIP_TYPE 9 /* 9 for P9 */
#define NX_MIN(X,Y) (((X)<(Y))?(X):(Y))
#define NX_MAX(X,Y) (((X)>(Y))?(X):(Y))
#define ASSERT(X) assert(X)
#ifndef __unused
# define __unused __attribute__((unused))
#endif
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
/* debug flags for libnx */
#define NX_VERBOSE_LIBNX_MASK 0x000000ff
#define NX_DEVICES_MAX 256
/* deflate header */
#define HEADER_RAW 0
#define HEADER_ZLIB 1
#define HEADER_GZIP 2
#ifndef MAX_WBITS
# define MAX_WBITS 15
#endif
#ifndef DEF_WBITS
# define DEF_WBITS MAX_WBITS
#endif
#define NXQWSZ (sizeof(nx_qw_t))
extern FILE *nx_gzip_log;
/* common config variables for all streams */
struct nx_config_t {
long page_sz;
int line_sz;
int stored_block_len;
uint32_t max_byte_count_low;
uint32_t max_byte_count_high;
uint32_t max_byte_count_current;
uint32_t max_source_dde_count;
uint32_t max_target_dde_count;
uint32_t per_job_len; /* less than suspend limit */
uint32_t strm_def_bufsz;
uint32_t strm_inf_bufsz;
uint32_t soft_copy_threshold; /* choose memcpy or hwcopy */
uint32_t compress_threshold; /* collect as much input */
int inflate_fifo_in_len;
int inflate_fifo_out_len;
int deflate_fifo_in_len;
int deflate_fifo_out_len;
int retry_max;
int window_max;
int pgfault_retries;
int verbose;
};
typedef struct nx_config_t *nx_configp_t;
extern struct nx_config_t nx_config;
extern int nx_dht_config;
/* NX device handle */
struct nx_dev_t {
int lock; /* crb serializer */
int nx_errno;
int socket_id; /* one NX-gzip per cpu socket */
int nx_id; /* unique */
int open_cnt;
/* https://github.com/sukadev/linux/blob/vas-kern-v8.1/tools/testing/selftests/powerpc/user-nx842/compress.c#L514 */
struct {
int16_t version;
int16_t id;
int64_t flags;
void *paste_addr;
int fd;
void *vas_handle;
}; /* vas */
};
typedef struct nx_dev_t *nx_devp_t;
#define NX_DEVICES_MAX 256
/* save recent header bytes for hcrc calculations */
typedef struct ckbuf_t { char buf[128]; } ckbuf_t;
/* z_stream equivalent of NX hardware */
typedef struct nx_stream_s {
/* parameters for the supported functions */
int level; /* compression level */
int method; /* must be Z_DEFLATED for zlib */
int windowBits; /* also encodes zlib/gzip/raw */
int memLevel; /* 1...9 (default=8) */
int strategy; /* force compression algorithm */
/* stream data management */
char *next_in; /* next input byte */
uint32_t avail_in; /* # of bytes available at next_in */
unsigned long total_in; /* total nb of inp read so far */
char *next_out; /* next obyte should be put there */
uint32_t avail_out; /* remaining free space at next_out*/
unsigned long total_out; /* total nb of bytes output so far */
/* private area */
uint32_t adler; /* one of adler32 or crc32 */
uint32_t adler32; /* machine generated */
uint32_t crc32; /* checksums of bytes
* compressed then written to
* the stream out. note that
* this interpretation is
* different than zlib.h which
* says checksums are
* immediately updated upon
* reading from the input
* stream. Checksums will reflect
* the true values only after
* the stream is finished or fully
* flushed to the output */
char trailer[9]; /* temp storage for tail bytes */
int trailer_len;
uint64_t total_time; /* stream's total time running */
uint16_t hcrc16; /* stored in the gzip header */
uint32_t cksum; /* running checksum of the header */
ckbuf_t ckbuf; /* hcrc16 helpers */
int ckidx;
int inf_state;
int inf_held;
int resuming;
int history_len;
int last_comp_ratio;
int is_final;
int invoke_cnt; /* the times to invoke nx inflate or nx deflate */
void *dhthandle;
z_streamp zstrm; /* point to the parent */
gz_headerp gzhead; /* where to save gzip header information */
int gzflags; /* FLG */
unsigned int length;
int zlib_cmf;
int zlib_flg;
unsigned int dict_len;
unsigned int dict_alloc_len;
uint32_t dict_id;
char *dict;
int status; /* stream status */
nx_devp_t nxdevp; /* nx hardware device */
int wrap; /* 0 raw, 1 zlib, 2 gzip */
long page_sz;
int need_stored_block;
long last_ratio; /* compression ratio; 500
* means 50% */
char *fifo_in; /* user input collects here */
char *fifo_out; /* user output overflows here */
int32_t len_in; /* fifo_in length */
int32_t used_in; /* fifo_in used bytes */
int32_t cur_in; /* fifo_in starting offset */
int32_t len_out;
int32_t used_out;
int32_t cur_out;
/* locate the BFINAL bit */
/* char *last_block_head; /* the byte offset */
/* int last_block_head_bit; /* the bfinal bit pos */
/* partial byte bits counts that couldn't be output */
/* return status */
int nx_cc; /* nx return codes */
uint32_t nx_ce; /* completion extension Fig.6-7 */
int z_rc; /* libz return codes */
uint32_t spbc;
uint32_t tpbc;
uint32_t tebc;
/* nx commands */
/* int final_block; */
int flush;
uint32_t dry_run; /* compress by this amount
* do not update pointers */
/* nx command and parameter block; one command at a time per stream */
nx_gzip_crb_cpb_t *nxcmdp;
nx_gzip_crb_cpb_t nxcmd0;
/* nx_gzip_crb_cpb_t nxcmd1; two cpb blocks to parallelize
lzcount processing */
/* fifo_in is the saved amount from last deflate() call
fifo_out is the overflowed amount from last deflate()
call */
/* base, history, fifo_in first, and last, next_in */
nx_dde_t *ddl_in;
nx_dde_t dde_in[5] __attribute__ ((aligned (128)));
/* base, next_out, fifo_out */
nx_dde_t *ddl_out;
nx_dde_t dde_out[4] __attribute__ ((aligned (128)));
} nx_stream;
typedef struct nx_stream_s *nx_streamp;
/* stream pointers and lengths manipulated */
#define update_stream_out(s,b) do{(s)->next_out = (b); (s)->total_out = (b); (s)->avail_out -= (b);}while(0)
#define update_stream_in(s,b) do{(s)->next_in = (b); (s)->total_in = (b); (s)->avail_in -= (b);}while(0)
#define copy_stream_in(d,s) do{(d)->next_in = (s)->next_in; (d)->total_in = (s)->total_in; (d)->avail_in = (s)->avail_in;}while(0)
#define copy_stream_out(d,s) do{(d)->next_out = (s)->next_out; (d)->total_out = (s)->total_out; (d)->avail_out = (s)->avail_out;}while(0)
/* Fifo buffer management. NX has scatter gather capability.
We treat the fifo queue in two steps: from current head (or tail) to
the fifo end referred to as "first" and from 0 to the current tail (or head)
referred to as "last". To add sz bytes to the fifo
1. test fifo_free_bytes >= sz
2. get fifo_free_first_bytes and fifo_free_last_bytes amounts
3. get fifo_free_first_offset and fifo_free_last_offset addresses
4. append to fifo_free_first_offset; increase 'used'
5. if any data remaining, append to fifo_free_last_offset
To remove sz bytes from the fifo
1. test fifo_used_bytes >= sz
2. get fifo_used_first_bytes and fifo_used_last_bytes
3. get fifo_used_first_offset and fifo_used_last_offset
4. remove from fifo_used_first_offset; increase 'cur' mod 'fifolen', decrease 'used'
5. if more data to go, remove from fifo_used_last_offset
*/
#define fifo_used_bytes(used) (used)
#define fifo_free_bytes(used, len) ((len)-(used))
// amount of free bytes in the first and last parts
#define fifo_free_first_bytes(cur, used, len) ((((cur) (used))<=(len))? (len)-((cur) (used)): 0)
#define fifo_free_last_bytes(cur, used, len) ((((cur) (used))<=(len))? (cur): (len)-(used))
// amount of used bytes in the first and last parts
#define fifo_used_first_bytes(cur, used, len) ((((cur) (used))<=(len))? (used) : (len)-(cur))
#define fifo_used_last_bytes(cur, used, len) ((((cur) (used))<=(len))? 0: ((used) (cur))-(len))
// first and last free parts start here
#define fifo_free_first_offset(cur, used) ((cur) (used))
#define fifo_free_last_offset(cur, used, len) fifo_used_last_bytes(cur, used, len)
// first and last used parts start here
#define fifo_used_first_offset(cur) (cur)
#define fifo_used_last_offset(cur) (0)
/* for appending bytes in to the stream */
#define nx_put_byte(s,b) do { if ((s)->avail_out > 0) \
{ *((s)->next_out ) = (b); --(s)->avail_out; (s)->total_out; \
*((s)->zstrm->next_out ) = (b); --(s)->zstrm->avail_out; (s)->zstrm->total_out; } \
else { *((s)->fifo_out (s)->cur_out (s)->used_out) = (b); (s)->used_out; } } while(0)
/* nx_inflate_get_byte is used for header processing. It goes to
inf_return when bytes are not sufficient */
#define nx_inflate_get_byte(s,b) \
do { if ((s)->avail_in == 0) goto inf_return; b = (s)->ckbuf.buf[(s)->ckidx ] = *((s)->next_in); \
update_stream_in(s,1); update_stream_in(s->zstrm, 1);\
if ((s)->ckidx == sizeof(ckbuf_t)) { \
/* when the buffer is near full do a partial checksum */ \
(s)->cksum = nx_crc32((s)->cksum, (s)->ckbuf.buf, (s)->ckidx); \
(s)->ckidx = 0; } \
} while(0)
#define print_dbg_info(s, line) \
do { prt_info(\
"== %s:%d avail_in %ld total_in %ld \
used_in %ld cur_in %ld \
avail_out %ld total_out %ld \
used_out %ld cur_out %ld \
len_in %ld len_out %ld\n", __FUNCTION__, line, \
(long)(s)->avail_in, (long)(s)->total_in, \
(long)(s)->used_in, (long)(s)->cur_in, \
(long)(s)->avail_out, (long)(s)->total_out, \
(long)(s)->used_out, (long)(s)->cur_out, \
(long)(s)->len_in, (long)(s)->len_out); \
} while (0)
/* inflate states */
typedef enum {
inf_state_header = 0,
inf_state_gzip_id1,
inf_state_gzip_id2,
inf_state_gzip_cm,
inf_state_gzip_flg,
inf_state_gzip_mtime,
inf_state_gzip_xfl,
inf_state_gzip_os,
inf_state_gzip_xlen,
inf_state_gzip_extra,
inf_state_gzip_name,
inf_state_gzip_comment,
inf_state_gzip_hcrc,
inf_state_zlib_id1,
inf_state_zlib_flg,
inf_state_zlib_dict,
inf_state_zlib_dictid,
inf_state_inflate,
inf_state_data_error,
inf_state_mem_error,
inf_state_buf_error,
inf_state_stream_error,
} inf_state_t;
#define ZLIB_SIZE_SLOTS 256 /* Each slot represents 4KiB, the last
slot is represending everything
which larger or equal 1024KiB */
struct zlib_stats {
unsigned long deflateInit;
unsigned long deflate;
unsigned long deflate_avail_in[ZLIB_SIZE_SLOTS];
unsigned long deflate_avail_out[ZLIB_SIZE_SLOTS];
unsigned long deflateReset;
unsigned long deflate_total_in[ZLIB_SIZE_SLOTS];
unsigned long deflate_total_out[ZLIB_SIZE_SLOTS];
unsigned long deflateSetDictionary;
unsigned long deflateSetHeader;
unsigned long deflateParams;
unsigned long deflateBound;
unsigned long deflatePrime;
unsigned long deflateCopy;
unsigned long deflateEnd;
unsigned long inflateInit;
unsigned long inflate;
unsigned long inflate_avail_in[ZLIB_SIZE_SLOTS];
unsigned long inflate_avail_out[ZLIB_SIZE_SLOTS];
unsigned long inflateReset;
unsigned long inflateReset2;
unsigned long inflate_total_in[ZLIB_SIZE_SLOTS];
unsigned long inflate_total_out[ZLIB_SIZE_SLOTS];
unsigned long inflateSetDictionary;
unsigned long inflateGetDictionary;
unsigned long inflateGetHeader;
unsigned long inflateSync;
unsigned long inflatePrime;
unsigned long inflateCopy;
unsigned long inflateEnd;
uint64_t deflate_len;
uint64_t deflate_time;
uint64_t inflate_len;
uint64_t inflate_time;
};
extern pthread_mutex_t zlib_stats_mutex;
extern struct zlib_stats zlib_stats;
inline void zlib_stats_inc(unsigned long *count)
{
if (!nx_gzip_gather_statistics())
return;
pthread_mutex_lock(&zlib_stats_mutex);
*count = *count 1;
pthread_mutex_unlock(&zlib_stats_mutex);
}
static inline uint64_t get_nxtime_now(void)
{
return __ppc_get_timebase();
}
static inline uint64_t get_nxtime_diff(uint64_t t1, uint64_t t2)
{
if (t2 > t1) {
return t2-t1;
}else{
return (0xFFFFFFFFFFFFFFF-t1) t2;
}
}
#ifndef __KERNEL__
static inline double nxtime_to_us(uint64_t nxtime)
{
uint64_t freq;
freq = __ppc_get_timebase_freq();
return (double)(nxtime * 1000000 / freq) ;
}
#endif
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(a) (sizeof((a)) / sizeof((a)[0]))
#endif
/* gzip_vas.c */
extern void *nx_fault_storage_address;
extern void *nx_function_begin(int function, int pri);
extern int nx_function_end(void *vas_handle);
/* zlib crc32.c and adler32.c */
extern unsigned long nx_crc32_combine(unsigned long crc1, unsigned long crc2, uint64_t len2);
extern unsigned long nx_adler32_combine(unsigned long adler1, unsigned long adler2, uint64_t len2);
extern unsigned long nx_crc32(unsigned long crc, const unsigned char *buf, uint64_t len);
/* nx_zlib.c */
extern nx_devp_t nx_open(int nx_id);
extern int nx_close(nx_devp_t nxdevp);
extern int nx_touch_pages(void *buf, long buf_len, long page_len, int wr);
extern void *nx_alloc_buffer(uint32_t len, long alignment, int lock);
extern void nx_free_buffer(void *buf, uint32_t len, int unlock);
extern int nx_submit_job(nx_dde_t *src, nx_dde_t *dst, nx_gzip_crb_cpb_t *cmdp, void *handle);
extern int nx_append_dde(nx_dde_t *ddl, void *addr, uint32_t len);
extern int nx_touch_pages_dde(nx_dde_t *ddep, long buf_sz, long page_sz, int wr);
extern int nx_copy(char *dst, char *src, uint64_t len, uint32_t *crc, uint32_t *adler, nx_devp_t nxdevp);
extern void nx_hw_init(void);
extern void nx_hw_done(void);
/* nx_deflate.c */
extern int nx_deflateInit_(z_streamp strm, int level, const char *version, int stream_size);
extern int nx_deflateInit2_(z_streamp strm, int level, int method, int windowBits,
int memLevel __unused, int strategy, const char *version __unused, int stream_size __unused);
#define nx_deflateInit(strm, level) nx_deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
extern int nx_deflate(z_streamp strm, int flush);
extern int nx_deflateEnd(z_streamp strm);
extern unsigned long nx_deflateBound(z_streamp strm, unsigned long sourceLen);
/* nx_inflate.c */
extern int nx_inflateInit_(z_streamp strm, const char *version, int stream_size);
extern int nx_inflateInit2_(z_streamp strm, int windowBits, const char *version, int stream_size);
#define nx_inflateInit(strm) nx_inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
extern int nx_inflate(z_streamp strm, int flush);
extern int nx_inflateEnd(z_streamp strm);
/* nx_compress.c */
extern int nx_compress2(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen, int level);
extern int nx_compress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen);
extern uLong nx_compressBound(uLong sourceLen);
/* nx_uncompr.c */
extern int nx_uncompress2(Bytef *dest, uLongf *destLen, const Bytef *source, uLong *sourceLen);
extern int nx_uncompress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen);
/* nx_dht.c */
extern void *dht_begin(char *ifile, char *ofile);
extern void dht_end(void *handle);
extern int dht_lookup(nx_gzip_crb_cpb_t *cmdp, int request, void *handle);
#endif /* _NX_ZLIB_H */