cbf_compress.c

Go to the documentation of this file.
00001 /**********************************************************************
00002  * cbf_compress -- compression and decompression                      *
00003  *                                                                    *
00004  * Version 0.6 13 January 1999                                        *
00005  *                                                                    *
00006  *            Paul Ellis (ellis@ssrl.slac.stanford.edu) and           *
00007  *         Herbert J. Bernstein (yaya@bernstein-plus-sons.com)        *
00008  **********************************************************************/
00009   
00010 /**********************************************************************
00011  *                               NOTICE                               *
00012  * Creative endeavors depend on the lively exchange of ideas. There   *
00013  * are laws and customs which establish rights and responsibilities   *
00014  * for authors and the users of what authors create.  This notice     *
00015  * is not intended to prevent you from using the software and         *
00016  * documents in this package, but to ensure that there are no         *
00017  * misunderstandings about terms and conditions of such use.          *
00018  *                                                                    *
00019  * Please read the following notice carefully.  If you do not         *
00020  * understand any portion of this notice, please seek appropriate     *
00021  * professional legal advice before making use of the software and    *
00022  * documents included in this software package.  In addition to       *
00023  * whatever other steps you may be obliged to take to respect the     *
00024  * intellectual property rights of the various parties involved, if   *
00025  * you do make use of the software and documents in this package,     *
00026  * please give credit where credit is due by citing this package,     *
00027  * its authors and the URL or other source from which you obtained    *
00028  * it, or equivalent primary references in the literature with the    *
00029  * same authors.                                                      *
00030  *                                                                    *
00031  * Some of the software and documents included within this software   *
00032  * package are the intellectual property of various parties, and      *
00033  * placement in this package does not in any way imply that any       *
00034  * such rights have in any way been waived or diminished.             *
00035  *                                                                    *
00036  * With respect to any software or documents for which a copyright    *
00037  * exists, ALL RIGHTS ARE RESERVED TO THE OWNERS OF SUCH COPYRIGHT.   *
00038  *                                                                    *
00039  * Even though the authors of the various documents and software      *
00040  * found here have made a good faith effort to ensure that the        *
00041  * documents are correct and that the software performs according     *
00042  * to its documentation, and we would greatly appreciate hearing of   *
00043  * any problems you may encounter, the programs and documents any     *
00044  * files created by the programs are provided **AS IS** without any   *
00045  * warranty as to correctness, merchantability or fitness for any     *
00046  * particular or general use.                                         *
00047  *                                                                    *
00048  * THE RESPONSIBILITY FOR ANY ADVERSE CONSEQUENCES FROM THE USE OF    *
00049  * PROGRAMS OR DOCUMENTS OR ANY FILE OR FILES CREATED BY USE OF THE   *
00050  * PROGRAMS OR DOCUMENTS LIES SOLELY WITH THE USERS OF THE PROGRAMS   *
00051  * OR DOCUMENTS OR FILE OR FILES AND NOT WITH AUTHORS OF THE          *
00052  * PROGRAMS OR DOCUMENTS.                                             *
00053  **********************************************************************/
00054  
00055 /**********************************************************************
00056  *                          The IUCr Policy                           *
00057  *                                 on                                 *
00058  *     the Use of the Crystallographic Information File (CIF)         *
00059  *                                                                    *
00060  * The Crystallographic Information File (Hall, Allen & Brown,        *
00061  * 1991) is, as of January 1992, the recommended method for           *
00062  * submitting publications to Acta Crystallographica Section C. The   *
00063  * International Union of Crystallography holds the Copyright on      *
00064  * the CIF, and has applied for Patents on the STAR File syntax       *
00065  * which is the basis for the CIF format.                             *
00066  *                                                                    *
00067  * It is a principal objective of the IUCr to promote the use of      *
00068  * CIF for the exchange and storage of scientific data. The IUCr's    *
00069  * sponsorship of the CIF development was motivated by its            *
00070  * responsibility to its scientific journals, which set the           *
00071  * standards in crystallographic publishing. The IUCr intends that    *
00072  * CIFs will be used increasingly for electronic submission of        *
00073  * manuscripts to these journals in future. The IUCr recognises       *
00074  * that, if the CIF and the STAR File are to be adopted as a means    *
00075  * for universal data exchange, the syntax of these files must be     *
00076  * strictly and uniformly adhered to. Even small deviations from      *
00077  * the syntax would ultimately cause the demise of the universal      *
00078  * file concept. Through its Copyrights and Patents the IUCr has      *
00079  * taken the steps needed to ensure strict conformance with this      *
00080  * syntax.                                                            *
00081  *                                                                    *
00082  * The IUCr policy on the use of the CIF and STAR File processes is   *
00083  * as follows:                                                        *
00084  * _________________________________________________________________  *
00085  *                                                                    *
00086  *  * 1 CIFs and STAR Files may be generated, stored or transmitted,  *
00087  *    without permission or charge, provided their purpose is not     *
00088  *    specifically for profit or commercial gain, and provided that   *
00089  *    the published syntax is strictly adhered to.                    *
00090  *  * 2 Computer software may be developed for use with CIFs or STAR  *
00091  *    files, without permission or charge, provided it is distributed *
00092  *    in the public domain. This condition also applies to software   *
00093  *    for which a charge is made, provided that its primary function  *
00094  *    is for use with files that satisfy condition 1 and that it is   *
00095  *    distributed as a minor component of a larger package of         *
00096  *    software.                                                       *
00097  *  * 3 Permission will be granted for the use of CIFs and STAR Files *
00098  *    for specific commercial purposes (such as databases or network  *
00099  *    exchange processes), and for the distribution of commercial     *
00100  *    CIF/STAR software, on written application to the IUCr Executive *
00101  *    Secretary, 2 Abbey Square, Chester CH1 2HU, England. The        *
00102  *    nature, terms and duration of the licences granted will be      *
00103  *    determined by the IUCr Executive and Finance Committees.        *
00104  *                                                                    *
00105  * _________________________________________________________________  *
00106  *                                                                    *
00107  * In summary, the IUCr wishes to promote the use of the STAR File    *
00108  * concepts as a standard universal data file. It will insist on      *
00109  * strict compliance with the published syntax for all                *
00110  * applications. To assist with this compliance, the IUCr provides    *
00111  * public domain software for checking the logical integrity of a     *
00112  * CIF, and for validating the data name definitions contained        *
00113  * within a CIF. Detailed information on this software, and the       *
00114  * associated dictionaries, may be obtained from the IUCr Office at   *
00115  * 5 Abbey Square, Chester CH1 2HU, England.                          *
00116  **********************************************************************/
00117 
00118 #ifdef __cplusplus
00119 
00120 extern "C" {
00121 
00122 #endif
00123 
00124 #include <stdlib.h>
00125 #include <stdio.h>
00126 #include <string.h>
00127 #include <limits.h>
00128 
00129 #include "cbf.h"
00130 #include "cbf_alloc.h"
00131 #include "cbf_file.h"
00132 #include "cbf_compress.h"
00133 #include "cbf_canonical.h"
00134 #include "cbf_packed.h"
00135 #include "cbf_byte_offset.h"
00136 #include "cbf_predictor.h"
00137 #include "cbf_uncompressed.h"
00138 
00139 
00140   /* Compress an array */
00141 
00142 int cbf_compress (void         *source, 
00143                   size_t        elsize, 
00144                   int           elsign, 
00145                   size_t        nelem,
00146                   unsigned int  compression, 
00147                   cbf_file     *file, 
00148                   size_t       *compressedsize,
00149                   int          *bits, 
00150                   char         *digest)
00151 {
00152   int errorcode;
00153   
00154   size_t size;
00155 
00156 
00157     /* Discard any bits in the buffers */
00158     
00159   cbf_failnez (cbf_reset_bits (file))
00160   
00161   if (compressedsize)
00162   
00163     *compressedsize = 0;
00164 
00165 
00166     /* Start a digest? */
00167     
00168   if (digest)
00169   
00170     cbf_failnez (cbf_start_digest (file))
00171 
00172 
00173   errorcode = 0;
00174   
00175   size = 0;
00176 
00177   switch (compression)
00178   {
00179     case CBF_CANONICAL:
00180     
00181       errorcode = cbf_compress_canonical (source, elsize, elsign, nelem,
00182                                           compression, file, 
00183                                           &size, bits);
00184       break;
00185 
00186     case CBF_PACKED:
00187     case 0:
00188 
00189       errorcode = cbf_compress_packed (source, elsize, elsign, nelem,
00190                                        compression, file,
00191                                        &size, bits);
00192       break;
00193 
00194     case CBF_BYTE_OFFSET:
00195     
00196       errorcode = cbf_compress_byte_offset (source, elsize, elsign, nelem,
00197                                             compression, file, 
00198                                             &size, bits);
00199       break;
00200 
00201     case CBF_PREDICTOR:
00202     
00203       errorcode = cbf_compress_predictor (source, elsize, elsign, nelem,
00204                                           compression, file, 
00205                                           &size, bits);
00206       break;
00207 
00208     case CBF_NONE:
00209     
00210       errorcode = cbf_compress_none (source, elsize, elsign, nelem,
00211                                      compression, file, 
00212                                      &size, bits);
00213       break;
00214 
00215   default:
00216   
00217       errorcode = CBF_ARGUMENT;
00218   }
00219   
00220   
00221     /* Add the compressed size */
00222     
00223   if (compressedsize)
00224   
00225     *compressedsize += size;
00226 
00227 
00228     /* Flush the buffers */
00229 
00230   errorcode |= cbf_flush_bits (file);
00231   
00232 
00233     /* Get the digest? */
00234     
00235   if (digest)
00236   
00237      errorcode |= cbf_end_digest (file, digest);
00238 
00239 
00240     /* Done */
00241 
00242   return errorcode;
00243 }
00244 
00245 
00246   /* Get the parameters of an array (read up to the start of the table) */
00247   
00248 int cbf_decompress_parameters (int          *eltype, 
00249                                size_t       *elsize, 
00250                                int          *elsigned, 
00251                                int          *elunsigned,
00252                                size_t       *nelem, 
00253                                int          *minelem, 
00254                                int          *maxelem,
00255                                unsigned int  compression,
00256                                cbf_file     *file)
00257 {
00258   unsigned int compression_file, nelem_file;
00259 
00260   int errorcode, minelement_file, maxelement_file, 
00261                    elsigned_file, elunsigned_file;
00262 
00263 
00264     /* Discard any bits in the buffers */
00265 
00266   cbf_failnez (cbf_reset_bits (file));
00267   
00268    /* Check compression type */
00269 
00270   if (compression != CBF_CANONICAL   &&
00271       compression != CBF_PACKED      &&
00272       compression != CBF_BYTE_OFFSET &&
00273       compression != CBF_PREDICTOR   &&
00274       compression != CBF_NONE)
00275 
00276     return CBF_FORMAT;
00277 
00278   if (compression == CBF_NONE)
00279   {
00280     nelem_file = 0;
00281 
00282     minelement_file = maxelement_file = 0;
00283   } 
00284   else 
00285   { 
00286       /* Read the number of elements (64 bits) */
00287 
00288     cbf_failnez (cbf_get_integer (file, (int *) &nelem_file, 0, 64))
00289 
00290 
00291       /* Read the minimum element (64 bits) */
00292 
00293     errorcode = cbf_get_integer (file, &minelement_file, 1, 64);
00294 
00295     if (errorcode && errorcode != CBF_OVERFLOW)
00296 
00297       return errorcode;
00298 
00299 
00300       /* Read the maximum element (64 bits) */
00301 
00302     errorcode = cbf_get_integer (file, &maxelement_file, 1, 64);
00303 
00304     if (errorcode && errorcode != CBF_OVERFLOW)
00305 
00306       return errorcode;
00307   }
00308 
00309 
00310     /* Update the element sign, type, minimum, maximum and number */
00311 
00312   elsigned_file = !(((unsigned) minelement_file) <= 
00313                     ((unsigned) maxelement_file) &&
00314                     ((signed)   minelement_file) >
00315                     ((signed)   maxelement_file));
00316 
00317   elunsigned_file = !(((signed)   minelement_file) <= 
00318                       ((signed)   maxelement_file) &&
00319                       ((unsigned) minelement_file) >  
00320                       ((unsigned) maxelement_file));
00321 
00322   if (elsigned)
00323   
00324     *elsigned = elsigned_file;
00325 
00326   if (elunsigned)
00327   
00328     *elunsigned = elunsigned_file;
00329 
00330   if (eltype)
00331   
00332     *eltype = CBF_INTEGER;
00333 
00334   if (elsize)
00335 
00336       /* Calculate the minimum number of bytes needed to hold the elements */
00337       
00338     if (minelement_file == 0 && maxelement_file == 0)
00339       
00340       *elsize = 0;
00341       
00342     else
00343 
00344       if ((!elsigned_file ||
00345           ((signed) minelement_file == (signed short) minelement_file &&
00346            (signed) maxelement_file == (signed short) maxelement_file)) ||
00347           (!elunsigned_file ||
00348           ((unsigned) minelement_file == (unsigned short) minelement_file &&
00349            (unsigned) maxelement_file == (unsigned short) maxelement_file)))
00350            
00351         if ((!elsigned_file ||
00352             ((signed) minelement_file == (signed char) minelement_file &&
00353              (signed) maxelement_file == (signed char) maxelement_file)) ||
00354              (!elunsigned_file ||
00355             ((unsigned) minelement_file == (unsigned char) minelement_file &&
00356              (unsigned) maxelement_file == (unsigned char) maxelement_file)))
00357              
00358           *elsize = sizeof (char);
00359           
00360         else
00361 
00362           *elsize = sizeof (short);
00363           
00364       else
00365         
00366         *elsize = sizeof (int);
00367 
00368   if (minelem)
00369   
00370     *minelem = minelement_file;
00371 
00372   if (maxelem)
00373   
00374     *maxelem = maxelement_file;
00375 
00376   if (nelem)
00377   
00378     *nelem = nelem_file;
00379 
00380 
00381     /* Success */
00382 
00383   return 0;
00384 }
00385 
00386 
00387   /* Decompress an array (from the start of the table) */
00388 
00389 int cbf_decompress (void         *destination, 
00390                     size_t        elsize, 
00391                     int           elsign, 
00392                     size_t        nelem, 
00393                     size_t       *nelem_read,
00394                     unsigned int  compression,
00395                     int           bits, 
00396                     int           sign,
00397                     cbf_file     *file)
00398 {
00399   switch (compression)
00400   {
00401     case CBF_CANONICAL:
00402     
00403       return cbf_decompress_canonical (destination, elsize, elsign, nelem,
00404                                        nelem_read, compression, 
00405                                        file);
00406 
00407     case CBF_PACKED:
00408     case 0:
00409 
00410       return cbf_decompress_packed (destination, elsize, elsign, nelem,
00411                                     nelem_read, compression, 
00412                                     file);
00413 
00414     case CBF_BYTE_OFFSET:
00415     
00416       return cbf_decompress_byte_offset (destination, elsize, elsign, nelem,
00417                                          nelem_read, compression, 
00418                                          file);
00419 
00420     case CBF_PREDICTOR:
00421     
00422       return cbf_decompress_predictor (destination, elsize, elsign, nelem,
00423                                        nelem_read, compression,
00424                                        file);
00425 
00426     case CBF_NONE:
00427     
00428       return cbf_decompress_none (destination, elsize, elsign, nelem, 
00429                                   nelem_read, compression, 
00430                                   bits, sign, file);
00431   }
00432 
00433 
00434     /* Fail */
00435 
00436   return CBF_ARGUMENT;
00437 }
00438 
00439 
00440 #ifdef __cplusplus
00441 
00442 }
00443 
00444 #endif