cbf_packed.c

Go to the documentation of this file.
00001 /**********************************************************************
00002  * cbf_packed -- Packing compression                                  *
00003  *                                                                    *
00004  * Version 0.6 13 January 1999                                        *
00005  *                                                                    *
00006  *            Paul Ellis (ellis@ssrl.slac.stanford.edu) and           *
00007  *         Herbert J. Bernstein (yaya@bernstein-plus-sons.com)        *
00008  **********************************************************************/
00009   
00010 /**********************************************************************
00011  *                               NOTICE                               *
00012  * Creative endeavors depend on the lively exchange of ideas. There   *
00013  * are laws and customs which establish rights and responsibilities   *
00014  * for authors and the users of what authors create.  This notice     *
00015  * is not intended to prevent you from using the software and         *
00016  * documents in this package, but to ensure that there are no         *
00017  * misunderstandings about terms and conditions of such use.          *
00018  *                                                                    *
00019  * Please read the following notice carefully.  If you do not         *
00020  * understand any portion of this notice, please seek appropriate     *
00021  * professional legal advice before making use of the software and    *
00022  * documents included in this software package.  In addition to       *
00023  * whatever other steps you may be obliged to take to respect the     *
00024  * intellectual property rights of the various parties involved, if   *
00025  * you do make use of the software and documents in this package,     *
00026  * please give credit where credit is due by citing this package,     *
00027  * its authors and the URL or other source from which you obtained    *
00028  * it, or equivalent primary references in the literature with the    *
00029  * same authors.                                                      *
00030  *                                                                    *
00031  * Some of the software and documents included within this software   *
00032  * package are the intellectual property of various parties, and      *
00033  * placement in this package does not in any way imply that any       *
00034  * such rights have in any way been waived or diminished.             *
00035  *                                                                    *
00036  * With respect to any software or documents for which a copyright    *
00037  * exists, ALL RIGHTS ARE RESERVED TO THE OWNERS OF SUCH COPYRIGHT.   *
00038  *                                                                    *
00039  * Even though the authors of the various documents and software      *
00040  * found here have made a good faith effort to ensure that the        *
00041  * documents are correct and that the software performs according     *
00042  * to its documentation, and we would greatly appreciate hearing of   *
00043  * any problems you may encounter, the programs and documents any     *
00044  * files created by the programs are provided **AS IS** without any   *
00045  * warranty as to correctness, merchantability or fitness for any     *
00046  * particular or general use.                                         *
00047  *                                                                    *
00048  * THE RESPONSIBILITY FOR ANY ADVERSE CONSEQUENCES FROM THE USE OF    *
00049  * PROGRAMS OR DOCUMENTS OR ANY FILE OR FILES CREATED BY USE OF THE   *
00050  * PROGRAMS OR DOCUMENTS LIES SOLELY WITH THE USERS OF THE PROGRAMS   *
00051  * OR DOCUMENTS OR FILE OR FILES AND NOT WITH AUTHORS OF THE          *
00052  * PROGRAMS OR DOCUMENTS.                                             *
00053  **********************************************************************/
00054  
00055 /**********************************************************************
00056  *                          The IUCr Policy                           *
00057  *                                 on                                 *
00058  *     the Use of the Crystallographic Information File (CIF)         *
00059  *                                                                    *
00060  * The Crystallographic Information File (Hall, Allen & Brown,        *
00061  * 1991) is, as of January 1992, the recommended method for           *
00062  * submitting publications to Acta Crystallographica Section C. The   *
00063  * International Union of Crystallography holds the Copyright on      *
00064  * the CIF, and has applied for Patents on the STAR File syntax       *
00065  * which is the basis for the CIF format.                             *
00066  *                                                                    *
00067  * It is a principal objective of the IUCr to promote the use of      *
00068  * CIF for the exchange and storage of scientific data. The IUCr's    *
00069  * sponsorship of the CIF development was motivated by its            *
00070  * responsibility to its scientific journals, which set the           *
00071  * standards in crystallographic publishing. The IUCr intends that    *
00072  * CIFs will be used increasingly for electronic submission of        *
00073  * manuscripts to these journals in future. The IUCr recognises       *
00074  * that, if the CIF and the STAR File are to be adopted as a means    *
00075  * for universal data exchange, the syntax of these files must be     *
00076  * strictly and uniformly adhered to. Even small deviations from      *
00077  * the syntax would ultimately cause the demise of the universal      *
00078  * file concept. Through its Copyrights and Patents the IUCr has      *
00079  * taken the steps needed to ensure strict conformance with this      *
00080  * syntax.                                                            *
00081  *                                                                    *
00082  * The IUCr policy on the use of the CIF and STAR File processes is   *
00083  * as follows:                                                        *
00084  * _________________________________________________________________  *
00085  *                                                                    *
00086  *  * 1 CIFs and STAR Files may be generated, stored or transmitted,  *
00087  *    without permission or charge, provided their purpose is not     *
00088  *    specifically for profit or commercial gain, and provided that   *
00089  *    the published syntax is strictly adhered to.                    *
00090  *  * 2 Computer software may be developed for use with CIFs or STAR  *
00091  *    files, without permission or charge, provided it is distributed *
00092  *    in the public domain. This condition also applies to software   *
00093  *    for which a charge is made, provided that its primary function  *
00094  *    is for use with files that satisfy condition 1 and that it is   *
00095  *    distributed as a minor component of a larger package of         *
00096  *    software.                                                       *
00097  *  * 3 Permission will be granted for the use of CIFs and STAR Files *
00098  *    for specific commercial purposes (such as databases or network  *
00099  *    exchange processes), and for the distribution of commercial     *
00100  *    CIF/STAR software, on written application to the IUCr Executive *
00101  *    Secretary, 2 Abbey Square, Chester CH1 2HU, England. The        *
00102  *    nature, terms and duration of the licences granted will be      *
00103  *    determined by the IUCr Executive and Finance Committees.        *
00104  *                                                                    *
00105  * _________________________________________________________________  *
00106  *                                                                    *
00107  * In summary, the IUCr wishes to promote the use of the STAR File    *
00108  * concepts as a standard universal data file. It will insist on      *
00109  * strict compliance with the published syntax for all                *
00110  * applications. To assist with this compliance, the IUCr provides    *
00111  * public domain software for checking the logical integrity of a     *
00112  * CIF, and for validating the data name definitions contained        *
00113  * within a CIF. Detailed information on this software, and the       *
00114  * associated dictionaries, may be obtained from the IUCr Office at   *
00115  * 5 Abbey Square, Chester CH1 2HU, England.                          *
00116  **********************************************************************/
00117 
00118 #ifdef __cplusplus
00119 
00120 extern "C" {
00121 
00122 #endif
00123 
00124 #include <stdlib.h>
00125 #include <stdio.h>
00126 #include <string.h>
00127 #include <limits.h>
00128 
00129 #include "cbf.h"
00130 #include "cbf_alloc.h"
00131 #include "cbf_compress.h"
00132 #include "cbf_file.h"
00133 #include "cbf_packed.h"
00134 
00135 #define CBF_SHIFT63 (sizeof (int) * CHAR_BIT > 64 ? 63 : 0)
00136 
00137 typedef struct
00138 {
00139   int offset [128][2];
00140 
00141   unsigned int size [128];
00142 
00143   unsigned int start;
00144 
00145   unsigned int offsets;
00146 }
00147 cbf_packed_data;
00148 
00149 #define CBF_PACKED_BITS1  4
00150 #define CBF_PACKED_BITS2  5
00151 #define CBF_PACKED_BITS3  6
00152 #define CBF_PACKED_BITS4  7
00153 #define CBF_PACKED_BITS5  8
00154 #define CBF_PACKED_BITS6  16
00155 
00156 #define CBF_PACKED_MASK1  ~15
00157 #define CBF_PACKED_MASK2  ~31
00158 #define CBF_PACKED_MASK3  ~63
00159 #define CBF_PACKED_MASK4  ~127
00160 #define CBF_PACKED_MASK5  ~255
00161 #define CBF_PACKED_MASK6  ~65535
00162 
00163 static const unsigned int cbf_packed_bits [8] = { 0, CBF_PACKED_BITS1,
00164                                                      CBF_PACKED_BITS2,
00165                                                      CBF_PACKED_BITS3,
00166                                                      CBF_PACKED_BITS4,
00167                                                      CBF_PACKED_BITS5,
00168                                                      CBF_PACKED_BITS6, 65 };
00169 
00170 
00171   /* Add an integer to the array of offsets to write */
00172 
00173 int cbf_add_offset (cbf_packed_data *data, unsigned int element,
00174                                            unsigned int last_element)
00175 {
00176   int offset;
00177   
00178   unsigned int index, m;
00179 
00180 
00181     /* Save the offset */
00182 
00183   index = (data->offsets + data->start) & 127;
00184 
00185   offset = element - last_element;
00186     
00187   data->offset [index][0] = offset;
00188 
00189 
00190     /* How many bits do we need to save? */
00191 
00192   if (offset == 0)
00193 
00194     data->size [index] = 0;
00195 
00196   else
00197 
00198     if ((element < last_element && offset > 0) ||
00199         (element > last_element && offset < 0))
00200     {
00201       data->offset [index][1] = -(offset > 0);
00202 
00203       data->size [index] = 7;
00204     }
00205     else
00206     {
00207       m = (offset ^ (offset << 1));
00208 
00209       if ((m & CBF_PACKED_MASK1) == 0)
00210 
00211         data->size [index] = 1;
00212 
00213       else
00214 
00215         if ((m & CBF_PACKED_MASK2) == 0)
00216 
00217           data->size [index] = 2;
00218 
00219         else
00220 
00221           if ((m & CBF_PACKED_MASK3) == 0)
00222 
00223             data->size [index] = 3;
00224 
00225           else
00226 
00227             if ((m & CBF_PACKED_MASK4) == 0)
00228 
00229               data->size [index] = 4;
00230 
00231             else
00232 
00233               if ((m & CBF_PACKED_MASK5) == 0)
00234 
00235                 data->size [index] = 5;
00236 
00237              else
00238 
00239                if ((m & CBF_PACKED_MASK6) == 0)
00240 
00241                  data->size [index] = 6;
00242 
00243                else
00244 
00245                  data->size [index] = 7;
00246     }
00247 
00248   
00249     /* Success */
00250 
00251   data->offsets++;
00252 
00253   return 0;
00254 }
00255 
00256 
00257   /* Pack 1 << chunk offsets in [size] bits each */
00258 
00259 int cbf_pack_chunk (cbf_packed_data *data, int size, int chunk, 
00260                                            cbf_file *file,
00261                                            unsigned long *bitcount)
00262 {
00263   unsigned int count, index;
00264 
00265 
00266     /* Write the codes */
00267 
00268   cbf_failnez (cbf_put_integer (file, (size << 3) | chunk, 0, 6))
00269 
00270   chunk = 1 << chunk;
00271 
00272   if (size > 0)
00273   {
00274     index = data->start;
00275     
00276     for (count = chunk; count; count--, index++)
00277 
00278       cbf_failnez (cbf_put_bits (file, data->offset [index & 127], 
00279                                        cbf_packed_bits [size]))
00280   }
00281 
00282 
00283     /* Update the buffer count and start */
00284 
00285   data->start = (data->start + chunk) & 127;
00286 
00287   data->offsets -= chunk;
00288 
00289 
00290     /* Calculate the number of bits written */
00291     
00292   if (bitcount)
00293   
00294     if (size)
00295     
00296       *bitcount = 6 + chunk * cbf_packed_bits [size];
00297       
00298     else
00299     
00300       *bitcount = 6;
00301 
00302 
00303     /* Success */
00304 
00305   return 0;
00306 }
00307 
00308 
00309   /* Get the maximum size required to code 1 << chunk offsets */
00310 
00311 unsigned int cbf_maximum_size (cbf_packed_data *data, unsigned int start,
00312                                                       unsigned int chunk)
00313 {
00314   unsigned int maxsize, index, count;
00315 
00316 
00317     /* Get the maximum size */
00318 
00319   maxsize = 0;
00320 
00321   index = data->start + start;
00322 
00323   for (count = 1 << chunk; count; count--)
00324   {
00325     if (data->size [index & 127] > maxsize)
00326 
00327       maxsize = data->size [index & 127];
00328     
00329     index++;
00330   }
00331   
00332   return maxsize;
00333 }
00334 
00335 
00336   /* Write out a block as economically as possible */
00337 
00338 int cbf_pack_nextchunk (cbf_packed_data *data, cbf_file *file, 
00339                                                unsigned long *bitcount)
00340 {
00341   unsigned int bits, next_bits, chunk, size, next_size,
00342                combined_bits, combined_size;
00343 
00344 
00345     /* Number of bits to encode a single offset */
00346 
00347   size = cbf_maximum_size (data, 0, 0);
00348 
00349   bits = cbf_packed_bits [size] + 6;
00350 
00351 
00352   chunk = 0;
00353 
00354   while (data->offsets >= (2 << chunk))
00355   {
00356     next_size = cbf_maximum_size (data, 1 << chunk, chunk);
00357 
00358     next_bits = (cbf_packed_bits [next_size] << chunk) + 6;
00359 
00360     if (size > next_size)
00361     {
00362       combined_bits = bits * 2 - 6;
00363 
00364       combined_size = size;
00365     }
00366     else
00367     {
00368       combined_bits = next_bits * 2 - 6;
00369 
00370       combined_size = next_size;
00371     }
00372 
00373     if (combined_bits > bits + next_bits)
00374 
00375       return cbf_pack_chunk (data, size, chunk, file, bitcount);
00376 
00377     bits = combined_bits;
00378 
00379     size = combined_size;
00380 
00381     chunk++;
00382   }
00383 
00384   return cbf_pack_chunk (data, size, chunk, file, bitcount);
00385 }
00386 
00387 
00388   /* Compress an array */
00389   
00390 int cbf_compress_packed (void         *source, 
00391                          size_t        elsize, 
00392                          int           elsign, 
00393                          size_t        nelem, 
00394                          unsigned int  compression, 
00395                          cbf_file     *file, 
00396                          size_t       *compressedsize,
00397                          int          *storedbits)
00398 {
00399   unsigned int minelement, maxelement;
00400 
00401   unsigned int count, element, lastelement, unsign, sign, limit;
00402 
00403   unsigned char *unsigned_char_data;
00404   
00405   unsigned long bitcount, chunkbits;
00406 
00407   cbf_packed_data *data;
00408 
00409 
00410     /* Is the element size valid? */
00411     
00412   if (elsize != sizeof (int) &&
00413       elsize != sizeof (short) &&
00414       elsize != sizeof (char))
00415 
00416     return CBF_ARGUMENT;
00417 
00418 
00419     /* Allocate memory */
00420 
00421   cbf_failnez (cbf_alloc ((void **) &data, NULL, sizeof (cbf_packed_data), 1))
00422 
00423   data->start = 0;
00424 
00425   data->offsets = 0;
00426 
00427 
00428     /* Count the expected number of bits */
00429 
00430   minelement = 0;
00431 
00432   maxelement = 0;
00433  
00434  
00435     /* Write the number of elements (64 bits) */
00436 
00437   cbf_onfailnez (cbf_put_integer (file, nelem, 0, 64),
00438                  cbf_free ((void **) data, NULL))
00439 
00440 
00441     /* Write the minimum element (64 bits) */
00442 
00443   cbf_onfailnez (cbf_put_integer (file, minelement, elsign, 64),
00444                  cbf_free ((void **) data, NULL))
00445 
00446 
00447     /* Write the maximum element (64 bits) */
00448 
00449   cbf_onfailnez (cbf_put_integer (file, maxelement, elsign, 64),
00450                  cbf_free ((void **) data, NULL))
00451 
00452 
00453     /* Write the reserved entry (64 bits) */
00454 
00455   cbf_onfailnez (cbf_put_integer (file, 0, 0, 64),
00456                  cbf_free ((void **) data, NULL))
00457 
00458   bitcount = 4 * 64;
00459   
00460 
00461     /* Initialise the pointers */
00462 
00463   unsigned_char_data = (unsigned char *) source;
00464 
00465 
00466     /* Maximum limit (unsigned) is 64 bits */
00467 
00468   if (elsize * CHAR_BIT > 64)
00469   {
00470     sign = 1 << CBF_SHIFT63;
00471 
00472     limit = ~-(sign << 1);
00473 
00474     if (storedbits)
00475     
00476       *storedbits = 64;
00477   }
00478   else
00479   {
00480     sign = 1 << (elsize * CHAR_BIT - 1);
00481 
00482     limit = ~0;
00483 
00484     if (storedbits)
00485     
00486       *storedbits = elsize * CHAR_BIT;
00487   }
00488 
00489 
00490     /* Offset to make the value unsigned */
00491 
00492   if (elsign)
00493 
00494     unsign = sign;
00495 
00496   else
00497 
00498     unsign = 0;
00499 
00500 
00501     /* Start from 0 */
00502 
00503   lastelement = unsign;
00504 
00505   for (count = 0; count < nelem; count++)
00506   {
00507       /* Get the next element */
00508 
00509     if (elsize == sizeof (int))
00510 
00511       element = *((unsigned int *) unsigned_char_data);
00512 
00513     else
00514 
00515       if (elsize == sizeof (short))
00516 
00517         element = *((unsigned short *) unsigned_char_data);
00518 
00519       else
00520 
00521         element = *unsigned_char_data;
00522 
00523     unsigned_char_data += elsize;
00524 
00525 
00526       /* Make the element unsigned */
00527 
00528     element += unsign;
00529 
00530 
00531       /* Limit the value to 64 bits */
00532 
00533     if (element > limit)
00534 
00535       if (elsign && (int) (element - unsign) < 0)
00536 
00537         element = 0;
00538 
00539       else
00540 
00541         element = limit;
00542 
00543 
00544       /* Add the offset to the buffer */
00545 
00546     cbf_add_offset (data, element, lastelement);
00547 
00548 
00549       /* Is the buffer full? */
00550 
00551     if (data->offsets == 128)
00552     {
00553         /* Write the next block as economically as possible */
00554 
00555       cbf_onfailnez (cbf_pack_nextchunk (data, file, &chunkbits),
00556                      cbf_free ((void **) data, NULL))
00557                  
00558       bitcount += chunkbits;
00559     }
00560 
00561 
00562       /* Update the previous element */
00563         
00564     lastelement = element;
00565   }
00566 
00567 
00568     /* Flush the buffers */
00569 
00570   while (data->offsets > 0)
00571   {
00572     cbf_onfailnez (cbf_pack_nextchunk (data, file, &chunkbits),
00573                    cbf_free ((void **) data, NULL))
00574                
00575     bitcount += chunkbits;
00576   }
00577 
00578 
00579     /* Return the number of characters written */
00580     
00581   if (compressedsize)
00582   
00583     *compressedsize = (bitcount + 7) / 8;
00584     
00585 
00586     /* Free memory */
00587 
00588   return cbf_free ((void **) &data, NULL);
00589 }
00590 
00591 
00592   /* Decompress an array */
00593 
00594 int cbf_decompress_packed (void         *destination, 
00595                            size_t        elsize, 
00596                            int           elsign, 
00597                            size_t        nelem, 
00598                            size_t       *nelem_read,
00599                            unsigned int  compression, 
00600                            cbf_file     *file)
00601 {
00602   unsigned int next, pixel, pixelcount;
00603 
00604   unsigned int bits, element, sign, unsign, limit, count64, count;
00605 
00606   unsigned char *unsigned_char_data;
00607 
00608   unsigned int offset [4], last_element [4];
00609 
00610   int errorcode;
00611 
00612 
00613     /* Is the element size valid? */
00614     
00615   if (elsize != sizeof (int) &&
00616       elsize != sizeof (short) &&
00617       elsize != sizeof (char))
00618 
00619     return CBF_ARGUMENT;
00620 
00621 
00622     /* Initialise the pointer */
00623 
00624   unsigned_char_data = (unsigned char *) destination;
00625 
00626 
00627     /* Maximum limit (unsigned) is 64 bits */
00628 
00629   if (elsize * CHAR_BIT > 64)
00630   {
00631     sign = 1 << CBF_SHIFT63;
00632 
00633     limit = ~-(sign << 1);
00634   }
00635   else
00636   {
00637     sign = 1 << (elsize * CHAR_BIT - 1);
00638 
00639     if (elsize == sizeof (int))
00640 
00641       limit = ~0;
00642 
00643     else
00644 
00645       limit = ~-(1 << (elsize * CHAR_BIT));
00646   }
00647 
00648 
00649     /* Offset to make the value unsigned */
00650 
00651   if (elsign)
00652 
00653     unsign = sign;
00654 
00655   else
00656 
00657     unsign = 0;
00658 
00659 
00660     /* How many ints do we need to hold 64 bits? */
00661 
00662   count64 = (64 + sizeof (int) * CHAR_BIT - 1) / (sizeof (int) * CHAR_BIT);
00663 
00664 
00665     /* Initialise the first element */
00666 
00667   last_element [0] = unsign;
00668 
00669   for (count = 1; count < count64; count++)
00670         
00671     last_element [count] = 0;
00672 
00673 
00674     /* Discard the reserved entry (64 bits) */
00675 
00676   cbf_failnez (cbf_get_integer (file, NULL, 0, 64))
00677 
00678 
00679     /* Read the elements */
00680 
00681   count = 0;
00682 
00683   while (count < nelem)
00684   {
00685       /* Get the next 6 bits of data */
00686 
00687     errorcode = cbf_get_integer (file, (int *) &next, 0, 6);
00688 
00689     if (errorcode)
00690     {
00691       if (nelem_read)
00692 
00693         *nelem_read = count + pixel;
00694           
00695       return errorcode;
00696     }
00697 
00698 
00699       /* Decode bits 0-5 */
00700 
00701     pixelcount = 1 << (next & 7);
00702 
00703     bits = cbf_packed_bits [(next >> 3) & 7];
00704 
00705 
00706       /* Read the offsets */
00707 
00708     if (pixelcount + count > nelem)
00709 
00710       pixelcount = nelem - count;
00711 
00712     for (pixel = 0; pixel < pixelcount; pixel++)
00713     {
00714         /* Read an offset */
00715 
00716       if (bits)
00717       {
00718         errorcode = cbf_get_bits (file, (int *) offset, bits);
00719 
00720         if (errorcode)
00721         {
00722           if (nelem_read)
00723 
00724             *nelem_read = count + pixel;
00725           
00726           return errorcode;
00727         }
00728 
00729 
00730           /* Update the current element */
00731 
00732         last_element [0] += offset [0];
00733       }
00734 
00735       element = last_element [0];
00736 
00737 
00738         /* Limit the value to fit the element size */
00739 
00740       if (element > limit)
00741 
00742         if (elsign && (int) (element - unsign) < 0)
00743 
00744           element = 0;
00745 
00746         else
00747 
00748           element = limit;
00749 
00750 
00751         /* Make the element signed? */
00752 
00753       element -= unsign;
00754 
00755 
00756         /* Save the element */
00757 
00758       if (elsize == sizeof (int))
00759 
00760         *((unsigned int *) unsigned_char_data) = element;
00761 
00762       else
00763 
00764         if (elsize == sizeof (short))
00765 
00766           *((unsigned short *) unsigned_char_data) = element;
00767 
00768         else
00769 
00770           *unsigned_char_data = element;
00771 
00772       unsigned_char_data += elsize;
00773     }
00774 
00775     count += pixelcount;
00776   }
00777 
00778 
00779     /* Number read */
00780 
00781   if (nelem_read)
00782   
00783     *nelem_read = count;
00784 
00785 
00786     /* Success */
00787 
00788   return 0;
00789 }
00790 
00791 
00792 #ifdef __cplusplus
00793 
00794 }
00795 
00796 #endif