cbf_canonical.c

Go to the documentation of this file.
00001 /**********************************************************************
00002  * cbf_canonical -- canonical-code compression                        *
00003  *                                                                    *
00004  * Version 0.6 13 January 1999                                        *
00005  *                                                                    *
00006  *            Paul Ellis (ellis@ssrl.slac.stanford.edu) and           *
00007  *         Herbert J. Bernstein (yaya@bernstein-plus-sons.com)        *
00008  **********************************************************************/
00009   
00010 /**********************************************************************
00011  *                               NOTICE                               *
00012  * Creative endeavors depend on the lively exchange of ideas. There   *
00013  * are laws and customs which establish rights and responsibilities   *
00014  * for authors and the users of what authors create.  This notice     *
00015  * is not intended to prevent you from using the software and         *
00016  * documents in this package, but to ensure that there are no         *
00017  * misunderstandings about terms and conditions of such use.          *
00018  *                                                                    *
00019  * Please read the following notice carefully.  If you do not         *
00020  * understand any portion of this notice, please seek appropriate     *
00021  * professional legal advice before making use of the software and    *
00022  * documents included in this software package.  In addition to       *
00023  * whatever other steps you may be obliged to take to respect the     *
00024  * intellectual property rights of the various parties involved, if   *
00025  * you do make use of the software and documents in this package,     *
00026  * please give credit where credit is due by citing this package,     *
00027  * its authors and the URL or other source from which you obtained    *
00028  * it, or equivalent primary references in the literature with the    *
00029  * same authors.                                                      *
00030  *                                                                    *
00031  * Some of the software and documents included within this software   *
00032  * package are the intellectual property of various parties, and      *
00033  * placement in this package does not in any way imply that any       *
00034  * such rights have in any way been waived or diminished.             *
00035  *                                                                    *
00036  * With respect to any software or documents for which a copyright    *
00037  * exists, ALL RIGHTS ARE RESERVED TO THE OWNERS OF SUCH COPYRIGHT.   *
00038  *                                                                    *
00039  * Even though the authors of the various documents and software      *
00040  * found here have made a good faith effort to ensure that the        *
00041  * documents are correct and that the software performs according     *
00042  * to its documentation, and we would greatly appreciate hearing of   *
00043  * any problems you may encounter, the programs and documents any     *
00044  * files created by the programs are provided **AS IS** without any   *
00045  * warranty as to correctness, merchantability or fitness for any     *
00046  * particular or general use.                                         *
00047  *                                                                    *
00048  * THE RESPONSIBILITY FOR ANY ADVERSE CONSEQUENCES FROM THE USE OF    *
00049  * PROGRAMS OR DOCUMENTS OR ANY FILE OR FILES CREATED BY USE OF THE   *
00050  * PROGRAMS OR DOCUMENTS LIES SOLELY WITH THE USERS OF THE PROGRAMS   *
00051  * OR DOCUMENTS OR FILE OR FILES AND NOT WITH AUTHORS OF THE          *
00052  * PROGRAMS OR DOCUMENTS.                                             *
00053  **********************************************************************/
00054  
00055 /**********************************************************************
00056  *                          The IUCr Policy                           *
00057  *                                 on                                 *
00058  *     the Use of the Crystallographic Information File (CIF)         *
00059  *                                                                    *
00060  * The Crystallographic Information File (Hall, Allen & Brown,        *
00061  * 1991) is, as of January 1992, the recommended method for           *
00062  * submitting publications to Acta Crystallographica Section C. The   *
00063  * International Union of Crystallography holds the Copyright on      *
00064  * the CIF, and has applied for Patents on the STAR File syntax       *
00065  * which is the basis for the CIF format.                             *
00066  *                                                                    *
00067  * It is a principal objective of the IUCr to promote the use of      *
00068  * CIF for the exchange and storage of scientific data. The IUCr's    *
00069  * sponsorship of the CIF development was motivated by its            *
00070  * responsibility to its scientific journals, which set the           *
00071  * standards in crystallographic publishing. The IUCr intends that    *
00072  * CIFs will be used increasingly for electronic submission of        *
00073  * manuscripts to these journals in future. The IUCr recognises       *
00074  * that, if the CIF and the STAR File are to be adopted as a means    *
00075  * for universal data exchange, the syntax of these files must be     *
00076  * strictly and uniformly adhered to. Even small deviations from      *
00077  * the syntax would ultimately cause the demise of the universal      *
00078  * file concept. Through its Copyrights and Patents the IUCr has      *
00079  * taken the steps needed to ensure strict conformance with this      *
00080  * syntax.                                                            *
00081  *                                                                    *
00082  * The IUCr policy on the use of the CIF and STAR File processes is   *
00083  * as follows:                                                        *
00084  * _________________________________________________________________  *
00085  *                                                                    *
00086  *  * 1 CIFs and STAR Files may be generated, stored or transmitted,  *
00087  *    without permission or charge, provided their purpose is not     *
00088  *    specifically for profit or commercial gain, and provided that   *
00089  *    the published syntax is strictly adhered to.                    *
00090  *  * 2 Computer software may be developed for use with CIFs or STAR  *
00091  *    files, without permission or charge, provided it is distributed *
00092  *    in the public domain. This condition also applies to software   *
00093  *    for which a charge is made, provided that its primary function  *
00094  *    is for use with files that satisfy condition 1 and that it is   *
00095  *    distributed as a minor component of a larger package of         *
00096  *    software.                                                       *
00097  *  * 3 Permission will be granted for the use of CIFs and STAR Files *
00098  *    for specific commercial purposes (such as databases or network  *
00099  *    exchange processes), and for the distribution of commercial     *
00100  *    CIF/STAR software, on written application to the IUCr Executive *
00101  *    Secretary, 2 Abbey Square, Chester CH1 2HU, England. The        *
00102  *    nature, terms and duration of the licences granted will be      *
00103  *    determined by the IUCr Executive and Finance Committees.        *
00104  *                                                                    *
00105  * _________________________________________________________________  *
00106  *                                                                    *
00107  * In summary, the IUCr wishes to promote the use of the STAR File    *
00108  * concepts as a standard universal data file. It will insist on      *
00109  * strict compliance with the published syntax for all                *
00110  * applications. To assist with this compliance, the IUCr provides    *
00111  * public domain software for checking the logical integrity of a     *
00112  * CIF, and for validating the data name definitions contained        *
00113  * within a CIF. Detailed information on this software, and the       *
00114  * associated dictionaries, may be obtained from the IUCr Office at   *
00115  * 5 Abbey Square, Chester CH1 2HU, England.                          *
00116  **********************************************************************/
00117 
00118 #ifdef __cplusplus
00119 
00120 extern "C" {
00121 
00122 #endif
00123 
00124 #include <stdlib.h>
00125 #include <stdio.h>
00126 #include <string.h>
00127 #include <limits.h>
00128 
00129 #include "cbf.h"
00130 #include "cbf_alloc.h"
00131 #include "cbf_canonical.h"
00132 #include "cbf_compress.h"
00133 #include "cbf_file.h"
00134 
00135 #define CBF_TABLEENTRYBITS  8  /* Bits in a table entry             */
00136 #define CBF_MAXBITS        15  /* Maximum bits in a simple code     */
00137 #define CBF_MAXMAXBITS     65  /* Bits in an coded integer          */
00138 #define CBF_MAXCODEBITS    64  /* Bits in a code                    */
00139 
00140 #define CBF_SHIFT63 (sizeof (int) * CHAR_BIT > 64 ? 63 : 0)
00141 
00142 
00143   /* Compression tree node */
00144 
00145 typedef struct cbf_compress_nodestruct
00146 {
00147   size_t       count;           /* Number in the file                  */
00148   unsigned int code;            /* Code                                */
00149   unsigned int bitcount;        /* Bits in the minimum-redundancy code */
00150   unsigned int bitcode [4];     /* Minimum-redundancy code             */
00151 
00152   struct cbf_compress_nodestruct *next,
00153                                  *child [2];
00154 }
00155 cbf_compress_node;
00156 
00157 
00158   /* Compression data */
00159 
00160 typedef struct
00161 {
00162   cbf_file    *file;             /* File                           */
00163 
00164   unsigned int bits;             /* Coded bits                     */
00165   unsigned int maxbits;          /* Maximum saved bits             */
00166   unsigned int endcode;          /* End-of-data code               */
00167 
00168   size_t       nodes;            /* Number of nodes                */
00169   size_t       nextnode;         /* Number of nodes used           */
00170 
00171   cbf_compress_node *node;       /* Nodes                       */
00172 }
00173 cbf_compress_data;
00174 
00175 
00176   /* Create compression data */
00177 
00178 int cbf_make_compressdata (cbf_compress_data **data, cbf_file *file)
00179 {
00180     /* Does the file exist? */
00181 
00182   if (!file)
00183 
00184     return CBF_ARGUMENT;
00185 
00186   if (!file->stream)
00187 
00188     return CBF_ARGUMENT;
00189 
00190 
00191     /* Allocate memory */
00192 
00193   cbf_failnez (cbf_alloc ((void **) data, NULL, sizeof (cbf_compress_data), 1))
00194 
00195 
00196     /* Initialise */
00197 
00198   (*data)->file     = file;
00199   
00200   (*data)->bits     = 0;
00201   (*data)->maxbits  = 0;
00202   (*data)->endcode  = 0;
00203   (*data)->nodes    = 0;
00204   (*data)->nextnode = 0;
00205  
00206   (*data)->node = NULL;
00207 
00208 
00209     /* Success */
00210 
00211   return 0;
00212 }
00213 
00214 
00215   /* Free data */
00216 
00217 void cbf_free_compressdata (cbf_compress_data *data)
00218 {
00219     /* Free storage */
00220 
00221   if (data)
00222   {
00223     cbf_free ((void **) &data->node, &data->nodes);
00224 
00225     cbf_free ((void **) &data, NULL);
00226   }
00227 }
00228 
00229 
00230   /* Initialise compression data arrays */
00231 
00232 int cbf_initialise_compressdata (cbf_compress_data *data, unsigned int bits, 
00233                                                           unsigned int maxbits)
00234 {
00235   size_t count;
00236   
00237   cbf_compress_node *node;
00238 
00239 
00240     /* Coded bits */
00241 
00242   if (bits > CBF_MAXBITS)
00243 
00244     return CBF_FORMAT;
00245 
00246 
00247     /* Codes must fit int + 1 bit */
00248 
00249   if (maxbits > CBF_MAXMAXBITS)
00250 
00251     return CBF_FORMAT;
00252 
00253   if (maxbits < sizeof (int) * CHAR_BIT + 1)
00254   {
00255     maxbits = sizeof (int) * CHAR_BIT + 1;
00256 
00257     if (maxbits > CBF_MAXMAXBITS)
00258 
00259       maxbits = CBF_MAXMAXBITS;
00260   }
00261 
00262   if (maxbits < bits)
00263 
00264     return CBF_FORMAT;
00265 
00266 
00267     /* Update the values */
00268 
00269   data->bits = bits;
00270 
00271   data->maxbits = maxbits;
00272 
00273 
00274     /* end-of-code code */
00275 
00276   data->endcode = 1 << bits;
00277 
00278 
00279     /* Allocate memory for the nodes */
00280 
00281   count = (data->endcode + maxbits) * 2 + 1;
00282 
00283   cbf_failnez (cbf_realloc ((void **) &data->node, &data->nodes,
00284                                       sizeof (cbf_compress_node), count))
00285 
00286 
00287     /* Initialise the nodes */
00288 
00289   node = data->node;
00290 
00291   for (count = 0; count < data->nodes; count++, node++)
00292   {
00293     node->bitcount  = 0;
00294     node->count     = 0;
00295 
00296     node->next      =
00297     node->child [0] =
00298     node->child [1] = NULL;
00299 
00300     if (count < data->endcode)
00301 
00302       node->code = count - ((count << 1) & data->endcode);
00303 
00304     else
00305 
00306       node->code = count;
00307   }
00308 
00309   data->nextnode = 0;
00310 
00311 
00312     /* Success */
00313 
00314   return 0;
00315 }
00316 
00317 
00318   /* Write a compression table */
00319 
00320 int cbf_put_table (cbf_compress_data *data, unsigned int *bitcount)
00321 {
00322   unsigned int count, codes, endcode, maxbits;
00323 
00324 
00325     /* Coded bits */
00326 
00327   cbf_failnez (cbf_put_integer (data->file, data->bits, 0, CBF_TABLEENTRYBITS))
00328 
00329   *bitcount = CBF_TABLEENTRYBITS;
00330   
00331 
00332     /* How many symbols do we actually use? */
00333 
00334   endcode = 1 << data->bits;
00335 
00336   for (codes = endcode + data->maxbits; data->node [codes].bitcount == 0; 
00337        codes--);
00338 
00339   codes++;
00340     
00341 
00342     /* Maximum bits used */
00343 
00344   if (codes > endcode + data->bits)
00345     
00346     maxbits = codes - endcode - 1;
00347 
00348   else
00349 
00350     maxbits = data->bits;
00351  
00352   cbf_failnez (cbf_put_integer (data->file, maxbits, 0, CBF_TABLEENTRYBITS))
00353 
00354   *bitcount += CBF_TABLEENTRYBITS;
00355 
00356 
00357     /* Minimum-redundancy code lengths */
00358 
00359   for (count = 0; count < codes; count++)
00360   {
00361     if (count == endcode + 1)
00362 
00363       count = endcode + data->bits + 1;
00364 
00365     cbf_failnez (cbf_put_integer (data->file, 
00366                                   data->node [count].bitcount, 0, 
00367                                   CBF_TABLEENTRYBITS))
00368 
00369     *bitcount += CBF_TABLEENTRYBITS;
00370   }
00371 
00372 
00373     /* Success */
00374 
00375   return 0;
00376 }
00377 
00378 
00379   /* Read a compression table */
00380 
00381 int cbf_get_table (cbf_compress_data *data)
00382 {
00383   unsigned int bits, maxbits, endcode, count;
00384 
00385 
00386     /* Coded bits */
00387 
00388   cbf_failnez (cbf_get_integer (data->file, (int *) &bits, 0, 
00389                                 CBF_TABLEENTRYBITS))
00390 
00391 
00392     /* Maximum number of bits */
00393 
00394   cbf_failnez (cbf_get_integer (data->file, (int *) &maxbits, 0, 
00395                                 CBF_TABLEENTRYBITS))
00396 
00397 
00398     /* Initialise the data */
00399 
00400   cbf_failnez (cbf_initialise_compressdata (data, bits, maxbits))
00401 
00402 
00403     /* Reserve nodes */
00404 
00405   endcode = 1 << data->bits;
00406 
00407   data->nextnode = endcode + data->maxbits + 1;
00408 
00409 
00410     /* Read the table */
00411 
00412   for (count = 0; count <= endcode + maxbits; count++)
00413   {
00414     cbf_failnez (cbf_get_integer (data->file, (int *) &bits, 0, 
00415                                   CBF_TABLEENTRYBITS))
00416 
00417     if (count == endcode + 1)
00418 
00419       count = endcode + data->bits + 1;
00420 
00421     data->node [count].bitcount = bits;
00422   }
00423 
00424 
00425     /* Success */
00426     
00427   return 0;
00428 }
00429 
00430 
00431   /* End the bitstream */
00432 
00433 int cbf_put_stopcode (cbf_compress_data *data, unsigned int *bitcount)
00434 {
00435   unsigned int endcode;
00436   
00437   endcode = 1 << data->bits;
00438 
00439   cbf_failnez (cbf_put_bits (data->file, 
00440                              (int *) data->node [endcode].bitcode,
00441                                      data->node [endcode].bitcount))
00442 
00443   *bitcount = data->node [endcode].bitcount;
00444 
00445 
00446     /* Success */
00447 
00448   return 0;
00449 }
00450 
00451 
00452   /* Insert a node into a tree */
00453 
00454 cbf_compress_node *cbf_insert_node (cbf_compress_node *tree, 
00455                                     cbf_compress_node *node)
00456 {
00457   if (tree)
00458   {
00459     if (node->count > tree->count)
00460 
00461       tree->child [1] = cbf_insert_node (tree->child [1], node);
00462 
00463     else
00464 
00465       tree->child [0] = cbf_insert_node (tree->child [0], node);
00466 
00467     return tree;
00468   }
00469   
00470   return node;
00471 }
00472 
00473 
00474   /* Append a node to a list */
00475 
00476 cbf_compress_node *cbf_append_node (cbf_compress_node *list, 
00477                                     cbf_compress_node *node)
00478 {
00479   cbf_compress_node *next;
00480 
00481   if (list)
00482   {
00483     next = list;
00484     
00485     while (next->next)
00486     
00487       next = next->next;
00488       
00489     next->next = node;
00490     
00491     return list;
00492   }
00493   
00494   return node;
00495 }
00496 
00497 
00498   /* Convert an ordered tree into an ordered list */
00499 
00500 cbf_compress_node *cbf_order_node (cbf_compress_node *tree)
00501 {
00502   if (tree)
00503   
00504     return cbf_append_node (cbf_append_node (cbf_order_node (tree->child [0]), 
00505                                      tree),  cbf_order_node (tree->child [1]));
00506 
00507   return NULL;
00508 }
00509 
00510 
00511   /* Create an ordered list */
00512 
00513 cbf_compress_node *cbf_create_list (cbf_compress_data *data) {
00514   
00515   unsigned int count, endcode, codes;
00516 
00517   cbf_compress_node *tree, *list, *node;
00518 
00519 
00520     /* Sort the nodes */
00521 
00522   endcode = 1 << data->bits;
00523 
00524   codes = endcode + data->maxbits + 1;
00525   
00526   node = data->node;
00527 
00528   tree = NULL;
00529 
00530   for (count = 0; count < codes; count++)
00531 
00532     if (node [count].count)
00533     
00534       tree = cbf_insert_node (tree, node + count);
00535 
00536   list = cbf_order_node (tree);
00537 
00538 
00539     /* Dismantle the tree */
00540 
00541   for (count = 0; count < codes; count++)
00542   
00543     node [count].child [0] = 
00544     node [count].child [1] = NULL;
00545 
00546   return list;
00547 }
00548 
00549 
00550   /* Combine the two nodes with minimum count */
00551 
00552 cbf_compress_node *cbf_reduce_list (cbf_compress_data *data, 
00553                                     cbf_compress_node *list)
00554 {
00555   cbf_compress_node *node, *next, *cnext;
00556    
00557 
00558     /* Construct a node */
00559 
00560   node = data->node + data->nextnode;
00561 
00562   data->nextnode++;
00563 
00564 
00565     /* Attach the top nodes */
00566 
00567   node->child [0] = list;
00568   node->child [1] = list->next;
00569   node->count     = list->count + list->next->count;
00570 
00571 
00572     /* Put it at the top */
00573 
00574   next = node->next = list->next->next;
00575 
00576 
00577     /* Order correct?  */
00578 
00579   if (next == NULL)
00580 
00581     return node;
00582     
00583   if (node->count <= next->count)
00584 
00585     return node;
00586 
00587 
00588     /* Otherwise move the node down to the correct position */
00589 
00590   cnext = next;
00591 
00592   while (cnext->next)
00593 
00594     if (node->count < cnext->count || node->count > cnext->next->count)
00595   
00596       cnext = cnext->next;
00597 
00598     else
00599 
00600       break;
00601     
00602   node->next  = cnext->next;
00603   cnext->next = node;
00604 
00605   return next;
00606 }
00607 
00608 
00609   /* Generate the minimum-redundancy code lengths */
00610 
00611 int cbf_generate_codelengths (cbf_compress_node *tree, int bitcount)
00612 {
00613   if (tree)
00614   {
00615     tree->bitcount = bitcount;
00616 
00617     cbf_generate_codelengths (tree->child [0], bitcount + 1);
00618     cbf_generate_codelengths (tree->child [1], bitcount + 1);
00619   }
00620 
00621 
00622     /* Success */
00623 
00624   return 0;
00625 }
00626 
00627 
00628   /* Reverse the order of the bits in the bit-codes */
00629 
00630 int cbf_reverse_bitcodes (cbf_compress_data *data)
00631 {
00632   unsigned int node, endcode, codes, count, index [2][2], bit [2];
00633 
00634   endcode = 1 << data->bits;
00635 
00636   codes = endcode + data->maxbits + 1;
00637 
00638   
00639     /* Reverse the order of the bits in the code */
00640 
00641   for (node = 0; node < codes; node++)
00642 
00643     if (data->node [node].bitcount > 0)
00644 
00645       for (count = 0; count < data->node [node].bitcount - count - 1; count++)
00646       {
00647         bit [0] = count;
00648         bit [1] = data->node [node].bitcount - count - 1;
00649 
00650         index [0][0] = bit [0] % (sizeof (unsigned int) * CHAR_BIT);
00651         index [0][1] = bit [0] / (sizeof (unsigned int) * CHAR_BIT);
00652         index [1][0] = bit [1] % (sizeof (unsigned int) * CHAR_BIT);
00653         index [1][1] = bit [1] / (sizeof (unsigned int) * CHAR_BIT);
00654 
00655         bit [0] = (data->node [node].bitcode [index [0][1]] 
00656                      >> (index [0][0])) & 1;
00657         bit [1] = (data->node [node].bitcode [index [1][1]] 
00658                      >> (index [1][0])) & 1;
00659 
00660         data->node [node].bitcode [index [0][1]] ^= (bit [0] ^ bit [1]) 
00661                      << index [0][0];
00662         data->node [node].bitcode [index [1][1]] ^= (bit [0] ^ bit [1]) 
00663                      << index [1][0];
00664       }
00665  
00666 
00667     /* Success */
00668 
00669   return 0;
00670 }
00671 
00672 
00673   /* Generate the canonical bit-codes */
00674 
00675 int cbf_generate_canonicalcodes (cbf_compress_data *data)
00676 {
00677   unsigned int count [2],
00678                base [CBF_MAXCODEBITS],
00679                node, codes, endcode, bits;
00680 
00681   endcode = 1 << data->bits;
00682 
00683   codes = endcode + data->maxbits + 1;
00684 
00685 
00686     /* Count the number of symbols with the same number of bits */
00687 
00688   memset (base, 0, sizeof (base));
00689 
00690   for (node = 0; node < codes; node++)
00691   {
00692     bits = data->node [node].bitcount;
00693 
00694     if (bits > CBF_MAXCODEBITS)
00695 
00696       return CBF_ARGUMENT;
00697 
00698     if (bits > 0)
00699     {
00700       memset (data->node [node].bitcode, 0, 4 * sizeof (unsigned int));
00701 
00702       data->node [node].bitcode [0] = base [bits - 1];
00703 
00704       base [bits - 1]++;
00705     }
00706   }
00707 
00708 
00709     /* Generate the initial code values */
00710 
00711   count [0] = 0;
00712 
00713   for (bits = CBF_MAXCODEBITS - 1; bits > 0; bits--)
00714   {
00715     count [1] = base [bits - 1];
00716     
00717     base [bits - 1] = (base [bits] + count [0]) / 2;
00718 
00719     count [0] = count [1];
00720   }
00721 
00722 
00723     /* Add the initial code to the count */
00724 
00725   for (node = 0; node < codes; node++)
00726   {
00727     bits = data->node [node].bitcount;
00728 
00729     if (bits > 0)
00730 
00731       data->node [node].bitcode [0] += base [bits - 1];
00732   }
00733 
00734 
00735     /* Reverse the order of the bits in the code */
00736 
00737   return cbf_reverse_bitcodes (data);
00738 }
00739 
00740 
00741   /* Compare the bitcodes of two nodes */
00742 
00743 int cbf_compare_bitcodes (const void *void1, const void *void2)
00744 {
00745   const cbf_compress_node *node1, *node2;
00746 
00747   const unsigned int *code1, *code2;
00748   
00749   unsigned int bit, bits;
00750 
00751   node1 = (const cbf_compress_node *) void1;
00752   node2 = (const cbf_compress_node *) void2;
00753 
00754 
00755     /* Get the codes */
00756   
00757   code1 = node1->bitcode;
00758   code2 = node2->bitcode;
00759 
00760   bits = node1->bitcount;
00761 
00762   if (bits > node2->bitcount)
00763 
00764     bits = node2->bitcount;
00765 
00766 
00767     /* Is either node not used? */
00768 
00769   if (bits == 0)
00770   {
00771     if (node1->bitcount == node2->bitcount)
00772 
00773       return 0;
00774 
00775     return 1 - ((node1->bitcount != 0) << 1);
00776   }
00777 
00778 
00779     /* Compare the codes bit-by-bit */
00780 
00781   for (bit = 0; bits > 0; bit++, bits--)
00782   {
00783     if (bit == sizeof (int) * CHAR_BIT)
00784     {
00785       bit = 0;
00786 
00787       code1++;
00788       code2++;
00789     }
00790 
00791     if (((*code1 ^ *code2) >> bit) & 1)
00792 
00793       return ((*code1 >> bit) & 1) - ((*code2 >> bit) & 1);
00794   }
00795 
00796 
00797     /* Same code */
00798 
00799   return 0;
00800 }
00801 
00802 
00803   /* Construct a tree from an ordered set of nodes */
00804 
00805 int cbf_construct_tree (cbf_compress_data *data, cbf_compress_node **node,
00806                                        int bits, cbf_compress_node **root)
00807 {
00808   cbf_compress_node *nextnode;
00809 
00810   if (node == NULL)
00811   {
00812     nextnode = data->node;
00813     
00814     node = &nextnode;
00815   }
00816 
00817   
00818     /* Create the node */
00819 
00820   *root = data->node + data->nextnode;
00821 
00822   data->nextnode++;
00823 
00824   
00825     /* Make the 0 branch then the 1 branch */
00826 
00827   if ((*node)->bitcount == bits)
00828   {
00829     (*root)->child [0] = *node;
00830 
00831     (*node)++;
00832   }
00833   else
00834 
00835     cbf_failnez (cbf_construct_tree (data, node, bits + 1, 
00836                  &(*root)->child [0]))
00837 
00838   if ((*node)->bitcount == bits)
00839   {
00840     (*root)->child [1] = *node;
00841 
00842     (*node)++;
00843   }
00844   else
00845 
00846     cbf_failnez (cbf_construct_tree (data, node, bits + 1, 
00847                  &(*root)->child [1]))
00848 
00849 
00850     /* Success */
00851 
00852   return 0;
00853 }
00854 
00855 
00856   /* Sort the nodes and set up the decoding arrays */
00857 
00858 int cbf_setup_decode (cbf_compress_data *data, cbf_compress_node **start)
00859 {
00860     /* Generate the codes */
00861 
00862   cbf_failnez (cbf_generate_canonicalcodes (data))
00863 
00864 
00865     /* Sort the nodes in order of the codes */
00866     
00867   qsort (data->node, data->nextnode, sizeof (cbf_compress_node),
00868                                              cbf_compare_bitcodes);
00869 
00870 
00871     /* Construct the tree */
00872 
00873   return cbf_construct_tree (data, NULL, 1, start);
00874 }
00875 
00876 
00877   /* Calculate the expected bit count */
00878 
00879 unsigned long cbf_count_bits (cbf_compress_data *data)
00880 {
00881   unsigned int endcode, codes, code;
00882 
00883   unsigned long bitcount;
00884 
00885   cbf_compress_node *node;
00886 
00887   endcode = 1 << data->bits;
00888 
00889   node = data->node;
00890   
00891   
00892     /* Basic entries */
00893     
00894   bitcount = 4 * 64;
00895 
00896 
00897     /* How many symbols do we actually use? */
00898 
00899   for (codes = endcode + data->maxbits; node [codes].bitcount == 0; codes--);
00900 
00901   codes++;
00902     
00903 
00904     /* Compression table */
00905 
00906   if (codes > endcode + data->bits)
00907     
00908     bitcount += 2 * CBF_TABLEENTRYBITS + 
00909                 (codes - data->bits) * CBF_TABLEENTRYBITS;
00910 
00911   else
00912 
00913     bitcount += 2 * CBF_TABLEENTRYBITS + (endcode + 1) * CBF_TABLEENTRYBITS;
00914 
00915 
00916     /* Compressed data */
00917 
00918   for (code = 0; code < endcode; code++, node++)
00919 
00920     bitcount += node->count * node->bitcount;
00921 
00922   for (; code < codes; code++, node++)
00923 
00924     bitcount += node->count * (node->bitcount + code - endcode);
00925 
00926   return bitcount;
00927 }
00928 
00929 
00930   /* Read a code */
00931 
00932 int cbf_get_code (cbf_compress_data *data, cbf_compress_node *root, 
00933                                                 unsigned int *code, 
00934                                                 unsigned int *bitcount)
00935 {
00936   int bits0, bits1;
00937  
00938     /* Decode the bitstream  */
00939 
00940   bits0 = data->file->bits [0];
00941   bits1 = data->file->bits [1];
00942 
00943   while (*(root->child))
00944   {
00945     if (bits0 == 0)
00946     {
00947       bits1 = getc (data->file->stream);
00948 
00949       if (bits1 == EOF)
00950       {
00951         data->file->bits [0] =
00952         data->file->bits [1] = 0;
00953 
00954         return CBF_FILEREAD;
00955       }
00956 
00957       bits0 = 8;
00958     }
00959 
00960     root = root->child [bits1 & 1];
00961 
00962     bits1 >>= 1;
00963 
00964     bits0--; 
00965   }
00966 
00967   data->file->bits [0] = bits0;
00968   data->file->bits [1] = bits1;
00969 
00970   *code = root->code;
00971 
00972 
00973     /* Simple coding? */
00974 
00975   if ((int) *code < (int) data->endcode)
00976   {
00977     *bitcount = data->bits;
00978 
00979     return 0;
00980   }
00981 
00982 
00983     /* Coded bit count? */
00984 
00985   *code -= data->endcode;
00986 
00987   if (*code)
00988 
00989     if (*code > data->maxbits)
00990 
00991       return CBF_FORMAT;
00992 
00993     else
00994     {
00995       *bitcount = *code;
00996 
00997       return cbf_get_bits (data->file, (int *) code, *code);
00998     }
00999 
01000 
01001     /* End code */
01002 
01003   return CBF_ENDOFDATA;
01004 }
01005 
01006 
01007   /* Write a coded integer */
01008 
01009 int cbf_put_code (cbf_compress_data *data, int code, unsigned int overflow,
01010                                                      unsigned int *bitcount)
01011 {
01012   unsigned int bits, m, endcode;
01013 
01014   int overcode [2], *usecode;
01015 
01016   cbf_compress_node *node;
01017 
01018   endcode = 1 << data->bits;
01019 
01020 
01021     /* Does the number fit in an integer? */
01022 
01023   if (!overflow)
01024   {
01025       /* Code direct? */
01026 
01027     m = (code ^ (code << 1));
01028 
01029     if ((m & -((int) endcode)) == 0)
01030     {
01031         /* Code the number */
01032       
01033       node = data->node + (code & (endcode - 1));
01034 
01035       bits = node->bitcount;
01036 
01037       cbf_put_bits (data->file, (int *) node->bitcode, bits);
01038 
01039       *bitcount = bits;
01040 
01041       return 0;
01042     }
01043 
01044       /* Count the number of bits */
01045       
01046     bits = sizeof (int) * CHAR_BIT;
01047 
01048     while (((m >> (bits - 1)) & 1) == 0)
01049     
01050       bits--;
01051 
01052     usecode = &code;
01053   }
01054   else
01055   {
01056       /* Overflow */
01057 
01058     overcode [0] = code;
01059 
01060     overcode [1] = -(code < 0);
01061 
01062     usecode = overcode;
01063 
01064     bits = sizeof (int) * CHAR_BIT;
01065   }
01066 
01067 
01068     /* Code the number of bits */
01069 
01070   node = data->node + endcode + bits;
01071 
01072   cbf_put_bits (data->file, (int *) node->bitcode, node->bitcount);
01073 
01074 
01075     /* Write the number */
01076 
01077   cbf_put_bits (data->file, usecode, bits);
01078     
01079   *bitcount = bits + node->bitcount;
01080 
01081 
01082     /* Success */
01083 
01084   return 0;
01085 }
01086 
01087 
01088   /* Count the values */
01089 
01090 int cbf_count_values (cbf_compress_data *data,
01091                       void *source, size_t elsize, int elsign, size_t nelem,
01092                       int *minelem, int *maxelem)
01093 {
01094   int code;
01095 
01096   unsigned int count, element, lastelement, minelement, maxelement,
01097            unsign, sign, bitcount, m, endcode, limit;
01098 
01099   unsigned char *unsigned_char_data;
01100 
01101   cbf_compress_node *node;
01102 
01103 
01104     /* Is the element size valid? */
01105     
01106   if (elsize != sizeof (int) &&
01107       elsize != sizeof (short) &&
01108       elsize != sizeof (char))
01109 
01110     return CBF_ARGUMENT;
01111 
01112 
01113     /* Initialise the pointers */
01114 
01115   unsigned_char_data = (unsigned char *) source;
01116 
01117   node = data->node;
01118 
01119 
01120     /* Maximum limit (unsigned) is 64 bits */
01121 
01122   if (elsize * CHAR_BIT > 64)
01123   {
01124     sign = 1 << CBF_SHIFT63;
01125 
01126     limit = ~-(sign << 1);
01127   }
01128   else
01129   {
01130     sign = 1 << (elsize * CHAR_BIT - 1);
01131 
01132     limit = ~0;
01133   }
01134 
01135 
01136     /* Offset to make the value unsigned */
01137 
01138   if (elsign)
01139 
01140     unsign = sign;
01141 
01142   else
01143 
01144     unsign = 0;
01145 
01146 
01147     /* Initialise the minimum and maximum elements */
01148 
01149   minelement = ~0;
01150 
01151   maxelement = 0;
01152 
01153 
01154     /* Start from 0 */
01155 
01156   lastelement = unsign;
01157 
01158   endcode = 1 << data->bits;
01159 
01160   for (count = 0; count < nelem; count++)
01161   {
01162       /* Get the next element */
01163 
01164     if (elsize == sizeof (int))
01165 
01166       element = *((unsigned int *) unsigned_char_data);
01167 
01168     else
01169 
01170       if (elsize == sizeof (short))
01171 
01172         element = *((unsigned short *) unsigned_char_data);
01173 
01174       else
01175 
01176         element = *unsigned_char_data;
01177 
01178     unsigned_char_data += elsize;
01179 
01180 
01181       /* Make the element unsigned */
01182 
01183     element += unsign;
01184 
01185 
01186       /* Limit the value to 64 bits */
01187 
01188     if (element > limit)
01189 
01190       if (elsign && (int) (element - unsign) < 0)
01191 
01192         element = 0;
01193 
01194       else
01195 
01196         element = limit;
01197 
01198 
01199       /* Update the minimum and maximum values */
01200 
01201     if (element < minelement)
01202 
01203       minelement = element;
01204 
01205     if (element > maxelement)
01206 
01207       maxelement = element;
01208 
01209 
01210       /* Calculate the offset to save */
01211 
01212     code = element - lastelement;
01213 
01214 
01215       /* Overflow? */
01216 
01217     if ((element < lastelement) ^ (code < 0))
01218 
01219       node [endcode + sizeof (int) * CHAR_BIT + 1].count++;
01220 
01221     else
01222     {
01223         /* Encode the offset */
01224     
01225       m = (code ^ (code << 1));
01226 
01227       if ((m & -((int) endcode)) == 0)
01228 
01229           /* Simple code */
01230 
01231         node [code & (endcode - 1)].count++;
01232 
01233       else
01234       {
01235           /* Count the number of bits */
01236       
01237         bitcount = sizeof (int) * CHAR_BIT;
01238 
01239         while (((m >> (bitcount - 1)) & 1) == 0)
01240     
01241           bitcount--;
01242 
01243         node [endcode + bitcount].count++;
01244       }
01245     }
01246 
01247 
01248       /* Update the previous element */
01249         
01250     lastelement = element;
01251   }
01252 
01253 
01254     /* Make the minimum and maxium signed? */
01255 
01256   minelement -= unsign;
01257   maxelement -= unsign;
01258 
01259 
01260     /* Save the minimum and maximum */
01261 
01262   if (nelem)
01263   {
01264     *minelem = (int) minelement;
01265     *maxelem = (int) maxelement;
01266   }
01267       
01268 
01269     /* End code */
01270 
01271   node [endcode].count = 1;
01272 
01273   data->nextnode = endcode + data->maxbits + 1;
01274 
01275 
01276     /* Success */
01277 
01278   return 0;
01279 }
01280 
01281 
01282   /* Compress an array */
01283 
01284 int cbf_compress_canonical (void         *source, 
01285                             size_t        elsize, 
01286                             int           elsign, 
01287                             size_t        nelem, 
01288                             unsigned int  compression, 
01289                             cbf_file     *file, 
01290                             size_t       *binsize,
01291                             int          *storedbits)
01292 {
01293   int code, minelement, maxelement;
01294 
01295   unsigned int count, element, lastelement, bits, unsign, sign, limit, endcode;
01296            
01297   unsigned long bitcount, expected_bitcount;
01298 
01299   unsigned char *unsigned_char_data;
01300 
01301   cbf_compress_node *node, *start;
01302 
01303   cbf_compress_data *data;
01304 
01305 
01306     /* Is the element size valid? */
01307     
01308   if (elsize != sizeof (int) &&
01309       elsize != sizeof (short) &&
01310       elsize != sizeof (char))
01311 
01312     return CBF_ARGUMENT;
01313 
01314 
01315     /* Create and initialise the compression data */
01316 
01317   cbf_failnez (cbf_make_compressdata (&data, file))
01318 
01319   cbf_onfailnez (cbf_initialise_compressdata (data, 8, 0), 
01320                  cbf_free_compressdata (data))
01321 
01322 
01323     /* Count the symbols */
01324 
01325   cbf_onfailnez (cbf_count_values (data, source, elsize, elsign, nelem, 
01326                                    &minelement, &maxelement),
01327                                    cbf_free_compressdata (data))
01328 
01329 
01330     /* Generate the code lengths */
01331 
01332   start = cbf_create_list (data);
01333 
01334   while (start->next)
01335 
01336     start = cbf_reduce_list (data, start);
01337 
01338   cbf_generate_codelengths (start, 0);
01339 
01340 
01341     /* Count the expected number of bits */
01342 
01343   expected_bitcount = cbf_count_bits (data);
01344   
01345   
01346     /* Write the number of elements (64 bits) */
01347 
01348   cbf_onfailnez (cbf_put_integer (file, nelem, 0, 64),
01349                  cbf_free_compressdata (data))
01350 
01351 
01352     /* Write the minimum element (64 bits) */
01353 
01354   cbf_onfailnez (cbf_put_integer (file, minelement, elsign, 64),
01355                  cbf_free_compressdata (data))
01356 
01357 
01358     /* Write the maximum element (64 bits) */
01359 
01360   cbf_onfailnez (cbf_put_integer (file, maxelement, elsign, 64),
01361                  cbf_free_compressdata (data))
01362 
01363 
01364     /* Write the reserved entry (64 bits) */
01365 
01366   cbf_onfailnez (cbf_put_integer (file, 0, 0, 64),
01367                  cbf_free_compressdata (data))
01368 
01369   bitcount = 4 * 64;
01370   
01371 
01372     /* Write the table */
01373  
01374   cbf_onfailnez (cbf_put_table (data, &bits), cbf_free_compressdata (data))
01375 
01376   bitcount += bits;
01377   
01378 
01379     /* Generate the canonical bitcodes */
01380 
01381   cbf_onfailnez (cbf_generate_canonicalcodes (data), \
01382                  cbf_free_compressdata (data))
01383   
01384   
01385     /* Initialise the pointers */
01386 
01387   unsigned_char_data = (unsigned char *) source;
01388 
01389   node = data->node;
01390 
01391 
01392     /* Maximum limit (unsigned) is 64 bits */
01393 
01394   if (elsize * CHAR_BIT > 64)
01395   {
01396     sign = 1 << CBF_SHIFT63;
01397 
01398     limit = ~-(sign << 1);
01399     
01400     if (storedbits)
01401     
01402       *storedbits = 64;
01403   }
01404   else
01405   {
01406     sign = 1 << (elsize * CHAR_BIT - 1);
01407 
01408     limit = ~0;
01409 
01410     if (storedbits)
01411     
01412       *storedbits = elsize * CHAR_BIT;
01413   }
01414 
01415 
01416     /* Offset to make the value unsigned */
01417 
01418   if (elsign)
01419 
01420     unsign = sign;
01421 
01422   else
01423 
01424     unsign = 0;
01425 
01426 
01427     /* Start from 0 */
01428 
01429   lastelement = unsign;
01430 
01431   endcode = 1 << data->bits;
01432 
01433   for (count = 0; count < nelem; count++)
01434   {
01435       /* Get the next element */
01436 
01437     if (elsize == sizeof (int))
01438 
01439       element = *((unsigned int *) unsigned_char_data);
01440 
01441     else
01442 
01443       if (elsize == sizeof (short))
01444 
01445         element = *((unsigned short *) unsigned_char_data);
01446 
01447       else
01448 
01449         element = *unsigned_char_data;
01450 
01451     unsigned_char_data += elsize;
01452 
01453 
01454       /* Make the element unsigned */
01455 
01456     element += unsign;
01457 
01458 
01459       /* Limit the value to 64 bits */
01460 
01461     if (element > limit)
01462 
01463       if (elsign && (int) (element - unsign) < 0)
01464 
01465         element = 0;
01466 
01467       else
01468 
01469         element = limit;
01470 
01471 
01472       /* Calculate the offset to save */
01473 
01474     code = element - lastelement;
01475 
01476 
01477       /* Write the (overflowed?) code */
01478 
01479     cbf_onfailnez (cbf_put_code (data, code, 
01480                   (element < lastelement) ^ (code < 0), &bits),
01481                    cbf_free_compressdata (data))
01482 
01483     bitcount += bits;
01484 
01485 
01486       /* Update the previous element */
01487         
01488     lastelement = element;
01489   }
01490 
01491 
01492     /* End code */
01493 
01494   cbf_onfailnez (cbf_put_stopcode (data, &bits), cbf_free_compressdata (data))
01495 
01496   bitcount += bits;
01497 
01498 
01499     /* Free memory */
01500 
01501   cbf_free_compressdata (data);
01502 
01503 
01504     /* Does the actual bit count match the expected bit count? */
01505 
01506   if (bitcount != expected_bitcount)
01507 
01508     return CBF_BITCOUNT;
01509 
01510 
01511     /* Calculate the number of characters written */
01512     
01513   if (binsize)
01514   
01515     *binsize = (bitcount + 7) / 8;
01516 
01517 
01518     /* Success */
01519 
01520   return 0;
01521 }
01522 
01523 
01524   /*****************************************************************/
01525   /* THIS FUNCTION WILL FAIL WITH VALUES OUTSIDE THE INTEGER RANGE */
01526   /*****************************************************************/
01527 
01528   /* Decompress an array (from the start of the table) */
01529 
01530 int cbf_decompress_canonical (void         *destination, 
01531                               size_t        elsize, 
01532                               int           elsign, 
01533                               size_t        nelem, 
01534                               size_t       *nelem_read,
01535                               unsigned int  compression, 
01536                               cbf_file     *file)
01537 {
01538   unsigned int bits, element, sign, unsign, limit, count64, count;
01539 
01540   unsigned char *unsigned_char_data;
01541 
01542   cbf_compress_data *data;
01543 
01544   cbf_compress_node *start;
01545 
01546   unsigned int offset [4], last_element [4];
01547 
01548   int errorcode;
01549 
01550 
01551     /* Is the element size valid? */
01552     
01553   if (elsize != sizeof (int) &&
01554       elsize != sizeof (short) &&
01555       elsize != sizeof (char))
01556 
01557     return CBF_ARGUMENT;
01558 
01559 
01560     /* Discard the reserved entry (64 bits) */
01561 
01562   cbf_failnez (cbf_get_integer (file, NULL, 0, 64))
01563 
01564 
01565     /* Create and initialise the compression data */
01566 
01567   cbf_failnez (cbf_make_compressdata (&data, file))
01568 
01569 
01570     /* Read the compression table */
01571 
01572   cbf_onfailnez (cbf_get_table (data), cbf_free_compressdata (data))
01573 
01574 
01575     /* Set up the decode data */
01576   
01577   cbf_onfailnez (cbf_setup_decode (data, &start), cbf_free_compressdata (data))
01578 
01579 
01580     /* Initialise the pointer */
01581 
01582   unsigned_char_data = (unsigned char *) destination;
01583 
01584 
01585     /* Maximum limit (unsigned) is 64 bits */
01586 
01587   if (elsize * CHAR_BIT > 64)
01588   {
01589     sign = 1 << CBF_SHIFT63;
01590 
01591     limit = ~-(sign << 1);
01592   }
01593   else
01594   {
01595     sign = 1 << (elsize * CHAR_BIT - 1);
01596 
01597     if (elsize == sizeof (int))
01598 
01599       limit = ~0;
01600 
01601     else
01602 
01603       limit = ~-(1 << (elsize * CHAR_BIT));
01604   }
01605 
01606 
01607     /* Offset to make the value unsigned */
01608 
01609   if (elsign)
01610 
01611     unsign = sign;
01612 
01613   else
01614 
01615     unsign = 0;
01616 
01617 
01618     /* How many ints do we need to hold 64 bits? */
01619 
01620   count64 = (64 + sizeof (int) * CHAR_BIT - 1) / (sizeof (int) * CHAR_BIT);
01621 
01622 
01623     /* Initialise the first element */
01624 
01625   last_element [0] = unsign;
01626 
01627   for (count = 1; count < count64; count++)
01628         
01629     last_element [count] = 0;
01630 
01631 
01632     /* Read the elements */
01633 
01634   for (count = 0; count < nelem; count++)
01635   {
01636       /* Read the offset */
01637 
01638     errorcode = cbf_get_code (data, start, offset, &bits);
01639 
01640     if (errorcode)
01641     {
01642       if (nelem_read)
01643       
01644         *nelem_read = count;
01645 
01646       cbf_free_compressdata (data);
01647       
01648       return errorcode;
01649     }
01650 
01651 
01652       /* Update the current element */
01653 
01654     last_element [0] += offset [0];
01655 
01656     element = last_element [0];
01657 
01658 
01659       /* Limit the value to fit the element size */
01660 
01661     if (element > limit)
01662 
01663       if (elsign && (int) (element - unsign) < 0)
01664 
01665         element = 0;
01666 
01667       else
01668 
01669         element = limit;
01670 
01671 
01672       /* Make the element signed? */
01673 
01674     element -= unsign;
01675 
01676 
01677       /* Save the element */
01678 
01679     if (elsize == sizeof (int))
01680 
01681       *((unsigned int *) unsigned_char_data) = element;
01682 
01683     else
01684 
01685       if (elsize == sizeof (short))
01686 
01687         *((unsigned short *) unsigned_char_data) = element;
01688 
01689       else
01690 
01691         *unsigned_char_data = element;
01692 
01693     unsigned_char_data += elsize;
01694   }
01695 
01696 
01697     /* Number read */
01698 
01699   if (nelem_read)
01700   
01701     *nelem_read = count;
01702 
01703 
01704     /* Free memory */
01705 
01706   cbf_free_compressdata (data);
01707 
01708 
01709     /* Success */
01710 
01711   return 0;
01712 }
01713 
01714 
01715 #ifdef __cplusplus
01716 
01717 }
01718 
01719 #endif