/*
  GNU polyxmass - the massist's program.
  -------------------------------------- 
  Copyright (C) 2000,2001,2002 Filippo Rusconi

  http://www.polyxmass.org

  This file is part of the "GNU polyxmass" project.
   
  The "GNU polyxmass" project is an official GNU project package (see
  www.gnu.org) released ---in its entirety--- under the GNU General
  Public License and was started at the Centre National de la
  Recherche Scientifique (FRANCE), that granted me the formal
  authorization to publish it under this Free Software License.

  This software is free software; you can redistribute it and/or
  modify it under the terms of the GNU  General Public
  License as published by the Free Software Foundation; either
  version 2 of the License, or (at your option) any later version.
   
  This software is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.
   
  You should have received a copy of the GNU  General Public
  License along with this software; if not, write to the
  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
  Boston, MA 02110-1301, USA.
*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "pxmchem-polymer.h"
#include "pxmchem-monomer.h"
#include "pxmchem-oligomer.h"
#include "libpolyxmass-plugin.h"
#include "pxmchem-polymer-plugins.h"


#define DATE_MAX_CHARS 200

/*
  NEW'ING FUNCTIONS, DUPLICATING FUNCTIONS, INITING FUNCTIONS ...
*/
PxmPolymer *
pxmchem_polymer_new (void)
{
  PxmPolymer *polymer = g_malloc0 (sizeof (PxmPolymer));
  
  polymer->plminfo = pxmchem_plminfo_new ();

  /*
    'masspair_seq' and 'masspair_sel' members are not allocated
    because they are not necessarily required.
  */
 
  polymer->monomerGPA = g_ptr_array_new ();
  
  polymer->propGPA = g_ptr_array_new ();
  
  return polymer;
}


PxmPlminfo *
pxmchem_plminfo_new (void)
{
  PxmPlminfo * plminfo = g_malloc0 (sizeof (PxmPlminfo));

  return plminfo;
}


gboolean
pxmchem_polymer_set_version (PxmPolymer *polymer, gchar *version)
{
  g_assert (polymer != NULL && version != NULL);

  if (polymer->version != NULL)
    g_free (polymer->version);
  
  polymer->version = g_strdup (version);
  
  return TRUE;
}

gboolean
pxmchem_polymer_set_modified (PxmPolymer *polymer, gboolean modified)
{
  g_assert (polymer != NULL);
  
  polymer->modified = modified;
  
  return TRUE;
}


gboolean
pxmchem_polymer_get_modified (PxmPolymer *polymer)
{
  g_assert (polymer != NULL);
  
  return polymer->modified;
}


gboolean
pxmchem_polymer_invert_modified (PxmPolymer *polymer)
{
  g_assert (polymer != NULL);
  
  polymer->modified = !polymer->modified;
  
  return polymer->modified;
}


gboolean
pxmchem_polymer_plminfo_set_type (PxmPlminfo *plminfo, gchar *type)
{
  g_assert (plminfo != NULL && type != NULL);
  
  if (plminfo->type != NULL)
    g_free (plminfo->type);
  
  plminfo->type = g_strdup (type);
  
  return TRUE;
}

gboolean
pxmchem_polymer_plminfo_set_name (PxmPlminfo *plminfo, gchar *name)
{
  g_assert (plminfo != NULL && name != NULL);
  
  if (plminfo->name != NULL)
    g_free (plminfo->name);
  
  plminfo->name = g_strdup (name);
  
  return TRUE;
}

gboolean
pxmchem_polymer_plminfo_set_code (PxmPlminfo *plminfo, gchar *code)
{
  g_assert (plminfo != NULL && code != NULL);
  
  if (plminfo->code != NULL)
    g_free (plminfo->code);
  
  plminfo->code = g_strdup (code);
  
  return TRUE;
}

gboolean
pxmchem_polymer_plminfo_set_author (PxmPlminfo *plminfo, gchar *author)
{
  g_assert (plminfo != NULL && author != NULL);
  
  if (plminfo->author != NULL)
    g_free (plminfo->author);
  
  plminfo->author = g_strdup (author);
  
  return TRUE;
}

gboolean
pxmchem_polymer_plminfo_set_date_year (PxmPlminfo *plminfo, gchar *year)
{
  g_assert (plminfo != NULL && year != NULL);
  
  if (plminfo->date_year != NULL)
    g_free (plminfo->date_year);
  
  plminfo->date_year = g_strdup (year);
  
  return TRUE;
}

gboolean
pxmchem_polymer_plminfo_set_date_month (PxmPlminfo *plminfo, gchar *month)
{
  g_assert (plminfo != NULL && month != NULL);
  
  if (plminfo->date_month != NULL)
    g_free (plminfo->date_month);
  
  plminfo->date_month = g_strdup (month);
  
  return TRUE;
}

gboolean
pxmchem_polymer_plminfo_set_date_day (PxmPlminfo *plminfo, gchar *day)
{
  g_assert (plminfo != NULL && day != NULL);
  
  if (plminfo->date_day != NULL)
    g_free (plminfo->date_day);
  
  plminfo->date_day = g_strdup (day);
  
  return TRUE;
}

gboolean
pxmchem_polymer_plminfo_set_file (PxmPlminfo *plminfo, gchar *file)
{
  g_assert (plminfo != NULL && file != NULL);
  
  if (plminfo->file != NULL)
    g_free (plminfo->file);
  
  plminfo->file = g_strdup (file);
  
  return TRUE;
}




/*
  INTEGRITY CHECKING FUNCTIONS
*/


/*
  LOCATING FUNCTIONS
*/
gchar *
pxmchem_polymer_get_left_end_modif_name (PxmPolymer *polymer)
{
  PxmProp *prop = NULL;
  

  g_assert (polymer != NULL);
  
  prop = libpolyxmass_prop_find_prop (polymer->propGPA,
				      NULL,
				      NULL,
				      "LEFT_END_MODIF",
				      NULL,
				      PXM_CMP_NO_DEEP);
  if (prop == NULL)
    return NULL;
  
  /* Remember that a modification is a two-strings PxmProp property,
     with name "LEFT_END_MODIF" and data "Acetylation", for example.
  */
  return (gchar *) prop->data;
}


gchar *
pxmchem_polymer_get_right_end_modif_name (PxmPolymer *polymer)
{
  PxmProp *prop = NULL;
  

  g_assert (polymer != NULL);
  
  prop = libpolyxmass_prop_find_prop (polymer->propGPA,
				      NULL,
				      NULL,
				      "RIGHT_END_MODIF",
				      NULL,
				      PXM_CMP_NO_DEEP);
  if (prop == NULL)
    return NULL;
  
  /* Remember that a modification is a two-strings PxmProp property,
     with name "RIGHT_END_MODIF" and data "Acetylation", for example.
  */
  return (gchar *) prop->data;
}




gint
pxmchem_polymer_find_sequence_motif (PxmPolymer *polymer, 
				     gchar *motif, 
				     GPtrArray *motifGPA,
				     PxmHowCmp how_cmp,
				     gint codelen, 
				     GPtrArray *refGPA, 
				     GPtrArray *propGPA,
				     GPtrArray *fillGPA)
{
  /*
    We get a pointer to a polymer in which we have to find a sequence
    motif corresponding to the motif parameter (either 'motif' string
    or 'motifGPA' array of monomers). 

    We first check what parameter is non-NULL. If both 'motif' and
    'motifGPA' are non-NULL, the motifGPA parameter is considered.

    If the 'motif' parameter is considered, then it is first
    converted into an array of monomers. That array of monomers is
    then used to perform the search operation. But then that search
    operation only relies upon the sequence initially in motif,
    without any prop objects being taken into account.

    If the 'motifGPA' parameter is considered, then the search
    operation involves the deep comparison of monomers from that
    array and from the polymer sequence. That is like asking that a
    given word be searched in a document and matches are considered
    valid only if that word is underlined or in bold face.

    The how_cmp parameter indicates if the comparison of the monomers
    in the motifGPA with the ones in the polymer sequence has to be
    deep or not. If it must be deep, then what it also says is if the
    array of prop objects in the monomers of the motifGPA array can
    be a subset of the the array of prop objects in the monomers of
    the polymer sequence. 

    For example, a monomer of motifGPA contains the "phosphorylation"
    "MODIF"-named prop object, and we are iterating a polymer sequence
    monomer that thas that same "MODIF"-named prop object, but also a
    pair of others prop objects. Are we considering that these two
    monomers are identical, for the sake of our searching logic, or
    not? Strictly speaking no, because the propGPA array of the two
    to-be-compared-monomers are not. However, we might want to search
    in a polymer sequence for monomers that are modified with a
    "MODIF"-named prop object of kind "phosphorylation" independently
    of other potential prop objects. In this case, we would ask that
    the how_cmp be at least PXM_CMP_1_SUBSET_OF_2, with the first
    monomer being kind of a subset of the second monomer.

    Of course, however, all that stuff above has no sense if the
    motif to be searched for was a string, since in that specific
    case, the motifGPA array of monomers contains only monomers of
    empty propGPA arrays. So, an optimization is that if the motif is
    a string, all these comparisons are not made, but only the code
    of the monomers be compared.
  */
  
  gint count = 0;
  gint iter = 0;
  gint jter = 0;
  gint end = 0;
  
  gboolean was_string = FALSE;
  gboolean found = FALSE;
  
  PxmMonomer *monomer_seq = NULL;
  PxmMonomer *monomer_motif = NULL;
  PxmOligomer *oligomer = NULL;
  PxmProp *prop = NULL;
  PxmProp *prop_found = NULL;
  
  
  g_assert (polymer != NULL);
  g_assert (refGPA != NULL);
  g_assert (fillGPA != NULL);

 
  /*
    Try to establish what the motif is, either a string or an array
    of monomers.
  */
  if (motifGPA == NULL || motifGPA->len <= 0)
    {
      if (motif == NULL || strlen (motif) <= 0)
	{
	  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		 _("%s@%d: both 'motif' and 'motifGPA' cannot be "
		 "NULL or empty\n"),
		 __FILE__, __LINE__);
	  
	  return -1;
	}
      
      /*
	At this point, we know that the string 'motif' contains the
	motif to be searched, and that string has to be converted into
	an array of ordered monomers.
      */
      motifGPA = g_ptr_array_new ();
      
      was_string = TRUE;
      
      /*
	Fill the array motifGPA with the monomers corresponding to
	the monomers found in the motif string-format sequence.
      */
      count = pxmchem_monomer_fill_array_from_string (motifGPA,
						      motif, codelen,
						      refGPA,
						      TRUE);
      if (count == -1)
	{
	  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		 _("%s@%d: failed to fill array of monomers with the "
		 " sequence motif string\n"),
		 __FILE__, __LINE__);
	  
	  return -1;
	}

      /*
	Since the motif is not zero-length, and since there were no errors
	during the filling of the array, count has to be superior to 0.
      */
      g_assert (count > 0); 
    }
  
  /*
    At this point we have an array of monomers corresponding to the
    motif to be searched for in the polymer sequence.
  */
  count = 0;

  /*
    An optimization here: if the polymer sequence is 11 monomers long
    and we have a motif 3 monomers long:

    that is we have this sequence EEEAAAAAEEE and this motif EEE

    Imagine that we are iterating with 'iter' in the first 'E' of the
    second EEE stretch (index 8). We still might have the possibility
    that EEE is found in its entirety in the polymer sequence (and,
    indeed, this is what is occurring here). Imagine now that iter ==
    9. It would be useless to iterated in the 9-10 indices, because
    the motif is 3 monomers long, and whatever the sequence of the
    oligomer 9-10, the number of monomers available would not be
    sufficient. So we can calculate the 'end' index of the polymer
    sequence for which it still makes sense to try to find matches
    but after which we can stop iterating because there will be a
    lack of monomers to ensure any match with the motif.
  */
  end = polymer->monomerGPA->len - motifGPA->len + 1;
  
  for (iter = 0; iter < end; iter++)
    {
      jter = 0;
      found = FALSE;
      
      /*
	monomer_seq = g_ptr_array_index (polymer->monomerGPA, iter);
	
	debug_printf (("monomer %d is %s\n", 
	iter+jter, monomer_seq->code));
      */

      for (jter = 0 ; jter < motifGPA->len; jter++)
	{
	  monomer_seq = g_ptr_array_index (polymer->monomerGPA, iter + jter);
	  g_assert (monomer_seq != NULL);
	  
	  monomer_motif = g_ptr_array_index (motifGPA, jter);
	  g_assert (monomer_motif != NULL);

	  /*
	    Now that we have the two monomers to compare, we can start
	    doing that comparison work on the ground of the parameters
	    passed to the function.
	  */
	  if (0 == pxmchem_monomer_cmp (monomer_motif, monomer_seq, 
					how_cmp))
	    {
	      /*
		Since, the compared monomers are the same, we can
		continue iterating in the motif and in the polymer
		sequence. We do this by letting the for loop increment
		the jter value, that is used to iterate in both
		sequence arrays.
	      */
	      found = TRUE;
	      
	      continue;
	    }
	  else
	    {
	      /*
         
	      We already see that the monomers are not the same, so
	      we want to break here, so that the outer loop can go
	      further by one index value.
	      */
	      found = FALSE;
	      
	      break; 
	    }
	}
      /*
	End of 
	for (jter = 0 ; jter < motifGPA->len; jter++)
      */
      
      
      /*
	We are here either because we successfully iterated in the
	inner loop to the end, thus finding the motif in the polymer
	sequence, or because the break statement was encountered. We
	rely on the 'found' variable to differentiate these two
	cases.
      */
      
      if (found == TRUE)
	{
	  oligomer = pxmchem_oligomer_new ();

	  oligomer->polymer = polymer;
	  oligomer->start_idx = iter;
	  oligomer->end_idx = iter + jter - 1;
  
	  g_ptr_array_add (fillGPA, oligomer);
	  
	  /*
	    Increment the count of allocated oligomers.
	  */
	  count++;
	}
    }
  /*
    End of 
    for (iter = 0; iter < polymer->monomerGPA->len; iter++)
  */

  /*
    At this point we have finished iterating in the polymer
    sequence. We should have an array filled with oligomers
    corresponding to the found motifs in the polymer sequence.
  */

  /*
    We still have something to check: the propGPA array of prop
    instances. This array contains properties that have to be found
    in each one of the oligomers that were found and stored in the
    fillGPA array. But these properties may occur only once per
    oligomer in any of the monomers that comprise each oligomer.

    For example, if propGPA contains a prop object of name "MODIF"
    with value "Phosphorylation", we should ensure that ALL the
    oligomers have this prop object in their monomers, AT LEAST ONCE
    IN ANY OF THESE MONOMERS. If it is not the case, then the
    oligomer is considered NOT TO MATCH the "find request" and
    removed from the array of the oligomers (that is fillGPA).
  */
  if (propGPA != NULL && propGPA->len > 0 && fillGPA->len > 0)
    {
      for (iter = 0; iter < propGPA->len; iter++)
	{
	  prop = g_ptr_array_index (propGPA, iter);
	  g_assert (prop != NULL);
	  
	  /*
	    We now have to make sure that we can find that prop object
	    in any of the monomers of the oligomers. Note that the 
	    comparison here is absolute.
	  */
	  for (jter = fillGPA->len -1; jter >= 0; jter--)
	    {
	      /* 
		 Note that we iterate in the reverse order in the
		 fillGPA array, because we might well have to remove
		 oligomers from that array, and we want to still be
		 able to iterate through all the oligomers
		 conveniently.
	      */
	      oligomer = g_ptr_array_index (fillGPA, jter);
	      g_assert (oligomer != NULL);
	      
	      /* 
		 Now that we have an oligomer, we can try to find
		 the prop in its monomers.
	      */
	      prop_found = 
		pxmchem_oligomer_find_prop_in_monomers 
		(oligomer, prop,
		 PXM_CMP_1_SUBSET_OF_2 | PXM_CMP_2_SUBSET_OF_1, NULL);
	      	      
	      /* 
		 If the prop_found is NULL, then that means that none
		 of the monomers that comprise the currently iterated
		 oligomer do contain the prop object that we are
		 requiring to find. That means that the oligomer is
		 actually invalid, and must be removed from the array
		 of oligomers (fillGPA).
	      */
	      if (prop_found == NULL)
		{
		  oligomer = g_ptr_array_remove_index (fillGPA, jter);
		  g_assert (oligomer != NULL);
		  
		  pxmchem_oligomer_free (oligomer);
		}
	    }
	  /* 
	     End of 
	     for (jter = fillGPA->len -1; jter = 0; jter--)
	  */
	}
      /*
	End of 
	for (iter = 0; iter < propGPA->len; iter++)
      */

      /*
	At this point, we have iterated in all the array of prop
	objects that must be found in the oligomers. All the oligomers
	that were found not to contain any of the prop objects required
	have been removed. Thus, at this point all the oligomers that are
	still present in fillGPA do match exactly the request.
      */
    }
  /* 
     End of 
     if (propGPA != NULL && propGPA->len > 0)
  */
  
  if (TRUE == was_string)
    pxmchem_monomer_GPA_free (motifGPA);
  
  /*
    debug_printf (("count of oligomers that were found matching is %d\n", 
    count));
  */
  return count;
}


gint
pxmchem_polymer_find_prop_in_sequence (PxmPolymer *polymer, 
				       PxmHowCmp how_cmp,
				       PxmProp *prop,
				       GPtrArray *fillGPA)
{
  gint count = 0;
  gint iter = 0;
  gint end = 0;
  gint len = 0;

  PxmMonomer *monomer = NULL;
  PxmOligomer *oligomer = NULL;
  PxmProp *prop_found = NULL;



  g_assert (polymer != NULL);
  g_assert (prop != NULL);
  g_assert (fillGPA != NULL);

  /* 
     We should iterate in the polymer sequence and for each iterated
     monomer we have to ensure that it contains a prop object that
     matches the one that is passed as parameter. The comparison is
     performed using the criteria specified in the 'how_cmp' parameter.
  */
  end = polymer->monomerGPA->len;

  for (iter = 0; iter < len; iter++)
    {
      monomer = g_ptr_array_index (polymer->monomerGPA, iter);
      g_assert (monomer != NULL);
      
      prop_found = pxmchem_monomer_find_prop (monomer, prop, how_cmp, NULL);
      
      if (prop_found != NULL)
	{
	  oligomer = pxmchem_oligomer_new ();

	  oligomer->polymer = polymer;
	  oligomer->start_idx = iter;
	  oligomer->end_idx = iter;
  
	  g_ptr_array_add (fillGPA, oligomer);

	  /*
	    Increment the count of allocated oligomers.
	  */
	  count++;

	  break;
	}
      
      else
	continue;
    }
  
  return count;
}


/*
  MODIFYING FUNCTIONS
*/
/*
  Returns the number of effective modifications: see enum for the LE,
  RE... for explanations of the code below.
*/
PxmEnd
pxmchem_polymer_modify (PxmPolymer *polymer, gchar *modif, PxmEnd end)
{
  PxmEnd sum = PXM_END_NONE;

  gboolean res = MODIF_FAILURE;
  
  /*
    In file libpolyxmass-globals.h :
    enum _PxmEnd
    {
    PXM_END_NONE = 1 << 0,
    PXM_END_LEFT = 1 << 1,
    PXM_END_RIGHT = 1 << 2,
    PXM_END_BOTH = (PXM_END_LEFT | PXM_END_RIGHT)
    };
  */
  
  g_assert (polymer !=NULL);
  g_assert (modif !=NULL);
  
  if (end == PXM_END_NONE)
    return PXM_END_NONE;
  

  if (end & PXM_END_LEFT)
    {
      res = pxmchem_polymer_LE_modify (polymer, modif);

      if (res == TRUE)
	sum |= PXM_END_LEFT;
    }

  if (end & PXM_END_RIGHT)
    {
      res = pxmchem_polymer_RE_modify (polymer, modif);

      if (res == TRUE)
	sum |= PXM_END_RIGHT;
    }

  return sum;
}

gboolean
pxmchem_polymer_LE_modify (PxmPolymer *polymer, gchar *modif)
{
  PxmProp *prop = NULL;
  

  g_assert (polymer != NULL);
  g_assert (modif != NULL);
  
  /*
    It is not possible that the same polymer has two different
    modifications at the same time on the same end. That means that
    if we are to modify it, we first must ensure that any
    previous modif be removed first.
  */

  /*
    Note that the function caller must have made sure that the modif
    passed as parameter is known to the polymer definition
    context. We are not taking any overhead now.
  */

  /*
    Check if polymer is already bearing a modification. If so remove it
    right away.
  */
  prop = libpolyxmass_prop_find_prop (polymer->propGPA,
				  NULL,
				  NULL,
				  "LEFT_END_MODIF",
				  NULL,
				  PXM_CMP_NO_DEEP);
  if (NULL != prop)
    {
      /*
	The polymer that is to be modified is already modified, so first
	remove the existing modification.
      */
      g_assert (TRUE == g_ptr_array_remove (polymer->propGPA, prop));
      
      libpolyxmass_prop_free (prop);
    }
  
  prop = libpolyxmass_prop_both_strings_new ("LEFT_END_MODIF", modif);
      
  g_ptr_array_add (polymer->propGPA, prop);
      
  return MODIF_SUCCESS;
}

  
gboolean
pxmchem_polymer_RE_modify (PxmPolymer *polymer, gchar *modif)
{
  PxmProp *prop = NULL;
  

  g_assert (polymer != NULL);
  g_assert (modif != NULL);
  
  /*
    It is not possible that the same polymer has two different
    modifications at the same time on the same end. That means that
    if we are to modify it, we first must ensure that any
    previous modif be removed first.
  */

  /* 
     Note that the function caller must have made sure that the modif
     passed as parameter is known to the polymer definition
     context. We are not taking any overhead now.
  */

  /*
    Check if polymer is already bearing a modification. If so remove it
    right away.
  */
  prop = libpolyxmass_prop_find_prop (polymer->propGPA,
				  NULL,
				  NULL,
				  "RIGHT_END_MODIF",
				  NULL,
				  PXM_CMP_NO_DEEP);
  if (NULL != prop)
    {
      /*
	The polymer that is to be modified is already modified, so first
	remove the existing modification.
      */
      g_assert (TRUE == g_ptr_array_remove (polymer->propGPA, prop));
      
      libpolyxmass_prop_free (prop);
    }
  
  prop = libpolyxmass_prop_both_strings_new ("RIGHT_END_MODIF", modif);
      
  g_ptr_array_add (polymer->propGPA, prop);
      
  return MODIF_SUCCESS;
}

  
/*
  Returns the number of effective modifications: see enum for the LE,
  RE... for explanations of the code below.
*/
PxmEnd
pxmchem_polymer_un_modify (PxmPolymer *polymer, gchar *modif, PxmEnd end)
{
  PxmEnd sum = 0;
  gboolean res = FALSE;
  
  /*
    In file libpolyxmass-globals.h :
    enum _PxmEnd
    {
    PXM_END_NONE = 1 << 0,
    PXM_END_LEFT = 1 << 1,
    PXM_END_RIGHT = 1 << 2,
    PXM_END_BOTH = (PXM_END_LEFT | PXM_END_RIGHT)
    };
  */

  g_assert (polymer !=NULL);
  g_assert (modif !=NULL);
  
  if (end == PXM_END_NONE)
    return PXM_END_NONE;


  if (end & PXM_END_LEFT)
    {
      res = pxmchem_polymer_LE_un_modify (polymer, modif);

      if (res == TRUE)
	sum |= PXM_END_LEFT;
    }

  if (end & PXM_END_RIGHT)
    {
      res = pxmchem_polymer_RE_un_modify (polymer, modif);

      if (res == TRUE)
	sum |= PXM_END_RIGHT;
    }

  return sum;
}


/*
  Returns
  UN_MODIF_SUCCESS or
  UN_MODIF_NO_MATCH or
  UN_MODIF_NO_MODIF or
  UN_MODIF_FAILURE.
*/
gboolean
pxmchem_polymer_LE_un_modify (PxmPolymer *polymer, gchar *modif)
{
  PxmProp *prop = NULL;
  
  g_assert (polymer != NULL);

  /*
    Check if polymer is already bearing a modification. If so check
    if we have to actually remove it or not.
  */
  prop = libpolyxmass_prop_find_prop (polymer->propGPA,
				  NULL,
				  NULL,
				  "LEFT_END_MODIF",
				  NULL,
				  PXM_CMP_NO_DEEP);
  if (NULL != prop)
    {
      /*
	If the modif parameter is non-NULL, then we are asked to
	remove the modification only if the current modification has
	the same name as the parameter. If it is NULL, we remove
	whatever modif we find in the polymer.
      */
      if (modif != NULL)
	{
	  if (0 == strcmp (prop->data, modif))
	    {
	      g_assert (TRUE == g_ptr_array_remove (polymer->propGPA, prop));
	      
	      libpolyxmass_prop_free (prop);
	      
	      return UN_MODIF_SUCCESS;
	    }
	  else
	    return UN_MODIF_NO_MATCH;
	}
      else
	{
	  g_assert (TRUE == g_ptr_array_remove (polymer->propGPA, prop));
	  
	  libpolyxmass_prop_free (prop);
	  
	  return UN_MODIF_SUCCESS;
	}
    }
  
  /*
    The polymer is not modified!
  */  
  return UN_MODIF_NO_MODIF;
}


/*
  Returns
  UN_MODIF_SUCCESS or
  UN_MODIF_NO_MATCH or
  UN_MODIF_NO_MODIF or
  UN_MODIF_FAILURE.
*/
gboolean
pxmchem_polymer_RE_un_modify (PxmPolymer *polymer, gchar *modif)
{
  PxmProp *prop = NULL;
  
  g_assert (polymer != NULL);

  /*
    Check if polymer is already bearing a modification. If so check
    if we have to actually remove it or not.
  */
  prop = libpolyxmass_prop_find_prop (polymer->propGPA,
				  NULL,
				  NULL,
				  "RIGHT_END_MODIF",
				  NULL,
				  PXM_CMP_NO_DEEP);
  if (NULL != prop)
    {
      /*
	If the modif parameter is non-NULL, then we are asked to
	remove the modification only if the current modification has
	the same name as the parameter. If it is NULL, we remove
	whatever modif we find in the polymer.
      */
      if (modif != NULL)
	{
	  if (0 == strcmp (prop->data, modif))
	    {
	      g_assert (TRUE == g_ptr_array_remove (polymer->propGPA, prop));
	      
	      libpolyxmass_prop_free (prop);
	      
	      return UN_MODIF_SUCCESS;
	    }
	  else
	    return UN_MODIF_NO_MATCH;
	}
      else
	{
	  g_assert (TRUE == g_ptr_array_remove (polymer->propGPA, prop));
	  
	  libpolyxmass_prop_free (prop);
	  
	  return UN_MODIF_SUCCESS;
	}
    }
  
  return UN_MODIF_NO_MODIF;
}


/*
  UTILITY FUNCTIONS
*/
gint
pxmchem_polymer_composition (PxmPolymer *polymer,
			     GPtrArray *mnm_refGPA,
			     GPtrArray *GPA,
			     gint start_idx, gint end_idx)
{
  PxmMonomer *monomer = NULL;
  PxmMonomer *iter_monomer = NULL;
  PxmMonomer *jter_monomer = NULL;
  
  PxmProp *prop = NULL;
  PxmProp *new_prop = NULL;
  
  gboolean processed = FALSE;
  
  gint len = 0;
  gint iter = 0;
  gint jter = 0;
  
  gint start_index = start_idx;
  gint end_index = end_idx;
  

  /*
    We will iterate in the polymer->monomerGPA array of monomer instances,
    and for each monomer iterated, we'll check if it was already 
    encountered in this sequence. If not a new monomer by the same
    name and code is created and appended to GPA. The newly created
    monomer will be further characterized by creating a prop 
    object with name "COUNT" and with value 1. If an iterated monomer
    was already encountered, then its prop object named "COUNT" sees
    its data member incremented by 1.
    
    At the end of the process GPA will hold as many monomers as 
    there were distincts monomers in the polymer->monomerGPA. Each 
    monomer will hold a prop instance in which the name is "COUNT" 
    and the data is an integer indicating how many of this monomer
    there were in the polymer->monomerGPA.
  */

  g_assert (polymer != NULL);
  g_assert (polymer->monomerGPA != NULL);
  g_assert (mnm_refGPA != NULL);
  g_assert (GPA != NULL);
  
  len = polymer->monomerGPA->len;
  
  if (start_idx < 0 || start_idx >= len)
    start_index = 0;
  
  if (end_idx >= len || end_idx < 0)
    end_index = polymer->monomerGPA->len - 1;
  
  /*
    Attention with the way we interpret the indexes received :
    let's take this sequence example, where the '^' indicates
    the position of the cursor (no actual selection here simply 
    cursor position. The indexes should be read this way :
    
    (0,5) means start_idx = 0, end_idx = 5.
    
    A  G  V
    ^          this yields (0,0)  
    A  G  V
    ^        this yields (0,1)  
    A  G  V
    ^     this yields (0,2)  
    A  G  V
    ^  this yields (0,3), which is 3 = length of polymer.
    
    Now the selection : the borders of the selection are described
    using the '|' character.
    
    |A|  G  V   this yields (0,1)  
                 
    |A  G|  V   this yields (0,2)  
                 
    A |G   V|  this yields (1,3) 
                  
    A  G  |V|  this yields (2,3), which is 3 = length of polymer.
    
  */

  /*
    Iterate in the polymer sequence p_monomerGPA monomer
    GPtrArray and for each monomer, check if it was already
    encountered or not by iterating in the composition
    GPtrArray and checking if the same monomer is already listed.
  */
  for (iter = start_idx; iter < end_idx + 1; iter++)
    {
      processed = FALSE;
      
      iter_monomer = g_ptr_array_index (polymer->monomerGPA, iter);
      
      /*
	Now see if this monomer has already been encountered.
      */
      for (jter = 0; jter < GPA->len; jter++)
	{
	  jter_monomer = g_ptr_array_index (GPA, jter);
	  
	  if (0 == strcmp (iter_monomer->code, jter_monomer->code))
	    {
	      /*
		The monomer was found, which means that we already 
		* had encountered it at least once before. We simply have
		* to increment its "COUNT" property data by 1.
		*/
	      prop = libpolyxmass_prop_find_prop (jter_monomer->propGPA,
					      NULL,
					      NULL,
					      "COUNT",
					      NULL,
					      PXM_CMP_NO_DEEP);
	      g_assert (prop != NULL);
	      
	      /*
		increment the value of the "COUNT" property
	      */
	      (*((gint *) prop->data))++;
	      
	      /*
		Attention : if the monomer from the polymer sequence 
		* is modified with a property "MODIF" we have 
		* to __duplicate__ this property and apppend it to the 
		* GPtrArray of prop objects of the "composition" monomer.
		*/
	      prop = libpolyxmass_prop_find_prop (iter_monomer->propGPA,
					      NULL,
					      NULL,
					      "MODIF",
					      NULL,
					      PXM_CMP_NO_DEEP);
	      if (prop != NULL)
		{
		  new_prop = libpolyxmass_prop_dup (prop, PXM_DUP_DEEP);
		  
		  g_ptr_array_add (jter_monomer->propGPA, new_prop);
		}

	      /*
		We should break, after letting the outside loop that
		* the monomer was already processed.
		*/
	      processed = TRUE;
	      break;
	    }
	  /*
	    end of case for which a same monomer was already encountered.
	  */
	}
      /*
	end of for (jter = 0; jter < GPA->len; jter++)
      */

      /*
	At this point either we have processed the monomer above, or
	we failed to process it because it was not found in the GPA.
      */
      if (processed == TRUE)
	continue;
      else
	{
	  /*
	    The iter_monomer was not found in the GPA, which means
	    that it is the very first time that it is encountered.
	    Thus we have to create a new monomer and set to this new
	    monomer a prop object of name "COUNT" and of data 1.
	  */
	  monomer = pxmchem_monomer_new_by_code (iter_monomer->code,
						 mnm_refGPA);
	  
	  /*
	    Construct a new prop object that we'll put into the monomer's 
	    GPtrArray of prop objects.
	  */
	  prop = libpolyxmass_prop_new ();
	  libpolyxmass_prop_set_name (prop, "COUNT");

	  /*
	    allocate a gint in the prop
	  */
	  prop->data = g_malloc0 (sizeof (gint));
	  (*((gint *) prop->data)) = 1;

	  /*
	    No need to register a special prop freeing 
	    accessory function since the default g_free() is OK
	    to free our allocated gint.
	  */

	  /*
	    Set this property to the monomer GPA of prop instances.
	  */
	  g_ptr_array_add (monomer->propGPA, prop);
	  
	  /*
	    Attention : if the monomer from the polymer sequence 
	    is modified with a property "MODIF" we have 
	    to __duplicate__ this property and apppend it to the 
	    GPtrArray of prop objects of the "composition" monomer.
	  */
	  prop = libpolyxmass_prop_find_prop (iter_monomer->propGPA,
					  NULL,
					  NULL,
					  "MODIF",
					  NULL,
					  PXM_CMP_NO_DEEP);
	  if (prop != NULL)
	    {
	      new_prop = libpolyxmass_prop_dup (prop,
					    PXM_DUP_DEEP);

	      g_ptr_array_add (monomer->propGPA, new_prop);
	    }
	  
	  /*
	    Finally append this monomer object to the GPtrArray 
	    passed as param.
	  */
	  g_ptr_array_add (GPA, monomer);
	}
    }
  /*
    end of for (iter = start_idx; iter <= end_idx ; iter++)
  */
  /*
    Which means we have iterated the relevant portion of
    the polymer's monmers' GPtrArray.
  */

  return GPA->len;
}


gchar *
pxmchem_polymer_make_codes_string (PxmPolymer *polymer,
				   gint start_idx, gint end_idx)
{
  gchar *help = NULL;
  
  
  g_assert (polymer != NULL);
  g_assert (polymer->monomerGPA != NULL);
  
  help = pxmchem_monomer_make_codes_string_by_idces (polymer->monomerGPA,
						     start_idx, end_idx);
  
  return help;
}


gint
pxmchem_polymer_calculate_length_from_string (gchar* seq,
					    gboolean check_codes,
					    gint codelen,
					    GPtrArray *mnm_refGPA)
{
  return pxmchem_monomer_count_from_string (seq, check_codes,
					    codelen, mnm_refGPA);
}


gchar *
pxmchem_polymer_get_current_xml_version (void)
{
  gchar *version = g_strdup_printf ("%s", POLYMER_SEQUENCE_XML_VERSION);
  
  return version;
}



/*
  Text-format TRANSACTIONS
*/

/***** XML-format *****/
gchar *
pxmchem_polymer_get_type_from_xml_file (gchar *file)
{
  /*
    The type of polymer in a polymer sequence xml file is located
    in the <type> element, as shown below:
    
    <polseqdata version="0.1">
    <polseqinfo>
    <type>protein</type>
    <name>hamster P-glycoprotein</name>
    <code>HAM_P90065</code>
    
    Thus, we have to go straight to the <type> element.
  */
  xmlDocPtr xml_doc = NULL;
  xmlNodePtr xml_node = NULL;
  
  gchar *type = NULL;

  g_assert (file != NULL);
  
  if (FALSE == g_file_test (file, G_FILE_TEST_EXISTS))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file not found: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }
      
  /*
    The very first thing we could do is check that the file is an
    XML file: <?xml version="1.0"?> should be the first item in the
    file.
  */
  if (FALSE == libpolyxmass_globals_check_xml_file (file))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is not valid xml format: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }
  
  /*
    Build an XML tree from a the file.
  */
  xml_doc = xmlParseFile (file);

  if (xml_doc == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is not valid xml format: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }

  /*
    Check if the document is of the right kind.
  */
  xml_node = xmlDocGetRootElement (xml_doc);

  if (xml_node == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is an empty xml file: '%s'\n"),
	     __FILE__, __LINE__, file);

      xmlFreeDoc (xml_doc);
      
      return NULL;
    }

  if (0 != strcmp ((gchar *) xml_node->name, "polseqdata"))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: file is of wrong type, "
	     "root node is not  \"polseqdata\": '%s'\n"),
	     __FILE__, __LINE__, file);
      
      xmlFreeDoc (xml_doc);
      
      return NULL;
    }
  
  /*
    We are at the root of the document, we have to go one step
    further to get our hands to <polseqinfo> element.
  */
  xml_node = xml_node->children;

  /*
    From a rigourous XML parsing point of view, the blanks found in
    the XML document are considered to be nodes, and we have to detect
    these and take proper action.
  */
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  if (0 != strcmp ("polseqinfo", (gchar *) xml_node->name))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is a badly formed xml file: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      xmlFreeDoc (xml_doc);
      
      return NULL;
    }

  /*
    The DTD stipulates that the next children has to be the <type>
    element in the <polseqinfo> element, so we just ask for it.
  */
  xml_node = xml_node->children;

  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  if (0 == strcmp ((gchar *) xml_node->name, "type"))
    {
      type = (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
    }
  else
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: failed to search the polymer type; DTD error?: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      xmlFreeDoc (xml_doc);
      
      return NULL;
    }

  xmlFreeDoc (xml_doc);

  /* That is an allocated string that should be freed when  the caller
     has finished with it.
  */
  return type;
}


gchar *
pxmchem_polymer_format_xml_string_DTD (void)
{
  gchar *result = NULL;
  
  gchar *DTD = 
    "<?xml version=\"1.0\"?>\n"
    "<!-- DTD for polymer sequences, used by the\n"
    "'GNU polyxmass' suite of mass spectrometry applications.\n"
    "Copyright 2003, 2004 Filippo Rusconi - Licensed under the GNU GPL -->\n"
    "<!DOCTYPE polseqdata [\n"
    "<!ATTLIST polseqdata version CDATA #REQUIRED>\n"
    "<!ELEMENT polseqdata (polseqinfo,polseq,prop*)>\n"
    "<!ELEMENT polseqinfo (type,name,code,author,date)>\n"
    "<!ELEMENT polseq (codes|monomer)*>\n"
    "<!ELEMENT monomer (code, prop*)>\n"
    "<!ELEMENT prop (name, data+)>\n"
    "<!ELEMENT date (year, month, day)>\n"
    "<!ELEMENT codes (#PCDATA)>\n"
    "<!ELEMENT type (#PCDATA)>\n"
    "<!ELEMENT name (#PCDATA)>\n"
    "<!ATTLIST data type (str | int | dbl) \"str\">\n"
    "<!ELEMENT data (#PCDATA)>\n"
    "<!ELEMENT code (#PCDATA)>\n"
    "<!ELEMENT author (#PCDATA)>\n"
    "<!ELEMENT year (#PCDATA)>\n"
    "<!ELEMENT month (#PCDATA)>\n"
    "<!ELEMENT day (#PCDATA)>\n"
    "]>\n";

  result = g_strdup (DTD);
  
  return result;
}
  



gchar *
pxmchem_polymer_format_xml_string_polseqdata (PxmPolymer *polymer, 
					      gchar *indent, gint offset)
{
  /*
    The pointer to the polymer instance to be formatted as a unique
    xml string will allow an iteration in the polymer itself, its
    props' array and in the array of monomer instances (the sequence
    itself, in fact). 
  */
  gint iter = 0;
  gint new_offset = 0;
  
  gchar *lead = NULL;
  gchar *help = NULL;
  
  GString *gs = NULL;
  
  PxmProp *prop = NULL;
  
  g_assert (polymer != NULL);
  g_assert (indent != NULL);
  
  gs = g_string_new ("");
  

  /*
    We are willing to create a string that begins with the DTD and
    that next looks like this:
    
    <polseqdata version="0.1">
    <polseqinfo>
    <type>protein</type>
    <name>hamster P-glycoprotein</name>
    <code>HAM_P90065</code>
    <author>rusconi</author>
    <date>
    <year>2003</year>
    <month>03</month>
    <day>09</day>
    </date>
    # note that <date> can be empty, like this      <date></date>
    </polseqinfo>
    <polseq>
    <codes>MEFEEDWYGT</codes>
    <monomer>
    <code>S</code>
    <prop>
    <name>MODIF</name>
    <data>Phosphorylation</data>
    </prop>
    <prop>
    <name>COMMENT</name>
    <data>Phosphorylation is only partial</data>
    </prop>
    </monomer>
    <codes>LVISKE</codes>
    <monomer>
    <code>K</code>
    <prop>
    <name>MODIF</name>
    <data>Acetylation</data>
    </prop>
    <prop>
    <name>NOTE</name>
    <data>COMMENT</data>
    <data type="str">The acetylation is only partial.</data>
    </prop>
    </monomer>
    <codes>RLRYMVFK</codes>
    </polseq>
    <prop>
    <name>COMMENT</name>
    <data>this polymer is partly membranous</data>
    </prop>
    <prop>
    <name>LEFT_END_MODIF</name>
    <data>Acetylation</data>
    </prop>
    <prop>
    <name>RIGHT_END_MODIF</name>
    <data>Phosphorylation</data>
    </prop>
    </polseqdata>
  */
  
  /*
    First start with the DTD string.
  */
  help = pxmchem_polymer_format_xml_string_DTD ();
  g_assert (help != NULL);
  
  g_string_append_printf (gs, "%s", help);
  g_free (help);
  
  /*
    Open the <polseqdata version="0.1"> node and immediately insert
    the non-iterative data.
  */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);
  
  g_string_append_printf (gs, "%s<polseqdata version=\"%s\">\n", 
			  lead, POLYMER_SEQUENCE_XML_VERSION);
  
  g_free (lead);
  new_offset = offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  /*
    Open the <polseqinfo> element which is simple ********************:
  */
  g_assert (polymer->plminfo != NULL);
  help = pxmchem_polymer_format_xml_string_polseqinfo (polymer->plminfo,
						       indent, new_offset);
  g_assert (help != NULL);
  g_string_append_printf (gs, "%s", help);
  g_free (help);
  
  /*
    Open the <polseq> element which is simple ********************:
  */
  g_assert (polymer->monomerGPA != NULL);
  help = 
    pxmchem_polymer_format_xml_string_polseq_with_mnm_GPA (polymer->
							   monomerGPA,
							   indent, 
							   new_offset);
  g_assert (help != NULL);
  g_string_append_printf (gs, "%s", help);
  g_free (help);
  
  /*
    And finally recursively deal with the polymer prop instances from
    within this function. The code below iterates in the props' array
    of the polymer instance and for each prop writes it to the file
    according to its specific xml-formatting plugin.
  */
  for (iter = 0; iter < polymer->propGPA->len; iter++)
    {
      prop = g_ptr_array_index (polymer->propGPA, iter);
      g_assert (prop != NULL);
      
      libpolyxmass_prop_formatter_xml_prop_plugin =
	libpolyxmass_prop_choose_format_xml_prop_plugin (prop->name);

      if (libpolyxmass_prop_formatter_xml_prop_plugin != NULL)
	{
	  help = libpolyxmass_prop_formatter_xml_prop_plugin (prop, 
							 indent,
							 new_offset,
							 NULL);
	  
	  if (help == NULL)
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		     _("%s@%d: failed to format xml string for polymer "
		     "prop: '%s' with plugin\n"),
		     __FILE__, __LINE__, prop->name);
	    }
	  else
	    {
	      
	      /*
		In some cases, we want to bypass a given prop processing
		(like the monomer chembridge prop, for example) returning
		a string that says not to append it to the elongating
		gs_prop GString... this is why we check for a "non
		applicable" "N/A" pattern.
	      */
	      if (0 != strcmp ("N/A", help))
		gs = g_string_append (gs, help);
	      
	      g_free (help);
	      help = NULL;
	    }
	}
      else
	{
	  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_MESSAGE,
		_("%s@%d: prop: '%s' has no registered xml formatter plugin\n"),
		 __FILE__, __LINE__, prop->name);
	}
    }
  
  g_free (lead);

  /*
    At this point we have finished formatting the xml string for node
    <polseqdata version="0.1">. Close this element and return the
    string.
  */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s</polseqdata>\n", lead);
  
  g_free (lead);
  
  help = gs->str;
  
  g_string_free (gs, FALSE);
  
  return help;
}


gchar *
pxmchem_polymer_format_xml_string_polseqinfo (PxmPlminfo *plminfo, 
					      gchar *indent, gint offset)
{
  gint new_offset = 0;
  
  gchar *lead = NULL;
  gchar *help = NULL;
  
  GString *gs = NULL;

  time_t the_time;
  struct tm *date_elements;
  gchar date_element [80] = "'\x0'";
    
  g_assert (plminfo != NULL && indent != NULL);
  
  gs = g_string_new ("");

  /*
    We are willing to create a <polseqinfo> node that should 
    look like this:
   
    <polseqinfo>
    <type>protein</type>
    <name>hamster P-glycoprotein</name>
    <code>HAM_P90065</code>
    <author>rusconi</author>
    <date>
    <year>2003</year>
    <month>03</month>
    <day>09</day>
    </date>
    # note that <date> can be empty, like this      <date></date>
    </polseqinfo>
  */
  
  
  /*
    Open the <polseqinfo> element and insert the data.
  */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s<polseqinfo>\n", lead);

  g_free (lead);

  new_offset = offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  g_assert (plminfo->type != NULL 
	    && strlen (plminfo->type) > 0);
  g_string_append_printf (gs, "%s<type>%s</type>\n", lead, plminfo->type);
  
  /*
    The name of the polymer is not formally required, so we have to
    check this carefully.
  */
  if (plminfo->name == NULL || strlen (plminfo->name) <= 0)
    plminfo->name = g_strdup_printf (_("Name not set"));
  g_string_append_printf (gs, "%s<name>%s</name>\n", lead, plminfo->name);

  /*
    The code of the polymer is not formally required, so we have to
    check this carefully.
  */
  if (plminfo->code == NULL || strlen (plminfo->code) <= 0)
    plminfo->code = g_strdup_printf (_("Code not set"));
  g_string_append_printf (gs, "%s<code>%s</code>\n", lead, plminfo->code);
  
  /*
    Special case of username, since we have to modify the
    plminfo->author member value, because we are writing this
    sequence to file. Function g_get_user_name () does not allocate
    the string.
  */
  help = (gchar *) g_get_user_name ();
  g_assert (help != NULL);
  g_string_append_printf (gs, "%s<author>%s</author>\n", lead, help);
  
  /*
    Special case of the date.
  */
  time (&the_time);
  date_elements = gmtime (&the_time);

  /*
    Open the <date> element with a newline after it.
  */
  g_string_append_printf (gs, "%s<date>\n", lead);

  g_free (lead);
  /*
    Increment the offset, because the <year> (and month and day) elements
    are indented with respect to the <date> element.
  */
  new_offset = new_offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);

  /*
    Append the <year> single line-element.
  */
  strftime (date_element, 79, "%Y", date_elements);
  g_string_append_printf (gs, "%s<year>%s</year>\n", lead, date_element);

  /*
    Append the <month> single line-element.
  */
  strftime (date_element, 79, "%m", date_elements);
  g_string_append_printf (gs, "%s<month>%s</month>\n", lead, date_element);

  /*
    Append the <day> single line-element.
  */
  strftime (date_element, 79, "%d", date_elements);
  g_string_append_printf (gs, "%s<day>%s</day>\n", lead, date_element);

  g_free (lead);
  new_offset = new_offset - 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);

  /*
    Finally close the <date> element:
  */
  g_string_append_printf (gs, "%s</date>\n", lead);
  
  g_free (lead);

  
  /*
    Finally we can close the <polseqinfo> node.
  */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s</polseqinfo>\n", lead);

  g_free (lead);
  
  
  help = gs->str;
  
  g_string_free (gs, FALSE);
  
  return help;
}



gchar *
pxmchem_polymer_format_xml_string_polseq_with_mnm_GPA (GPtrArray *GPA, 
						       gchar *indent, 
						       gint offset)
{
  gint new_offset = 0;

  gint iter = 0;
    
  gchar *lead = NULL;
  gchar *help = NULL;
  
  GString *gs = NULL;
  GString *gs_codes = NULL;

  PxmProp *prop = NULL;

  PxmMonomer *monomer = NULL;
  
  
  g_assert (GPA != NULL && indent != NULL);
  
  gs = g_string_new ("");

  /*
    We are willing to create a <polseq> node that should 
    look like this:
   
    <polseq>
    <codes>MEFEEDWYGEEDWYGTEEDWYGTEEDWYGTEEDWYGTEEDWYGTEEDWYGT</codes>
    <monomer>
    <code>S</code>
    <prop>
    <name>MODIF</name>
    <data>Phosphorylation</data>
    </prop>
    <prop>
    <name>COMMENT</name>
    <data>Phosphorylation is only partial</data>
    </prop>
    </monomer>
    <codes>LVISKE</codes>
    <monomer>
    <code>K</code>
    <prop>
    <name>MODIF</name>
    <data>Acetylation</data>
    </prop>
    <prop>
    <name>COMMENT</name>
    <data>This lysil acetylation is fake!</data>
    </prop>
    </monomer>
    <codes>RLRYMVFK</codes>
    </polseq>
  */
  
  
  /*
    Open the <polseq> element and insert the data.
  */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s<polseq>\n", lead);

  g_free (lead);

  new_offset = offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  /*
    The process here is simple: we iterate in the monomerGPA passed
    as parameter and for each monomer we check if it has a "MODIF" or
    "COMMENT" or... property associated with it. If so, we create a
    brand new <code> element to display fully the prop data. If not,
    we increment a 'codes' variable that holds all the codes of full
    stretches of monomer codes that have no property associated with
    them. See the example above.
  */

  for (iter = 0; iter < GPA->len; iter++)
    {
      monomer = g_ptr_array_index (GPA, iter);
      g_assert (monomer != NULL);
      
      /*
	Check if current monomer has properties in its prop array.
      */
      if (monomer->propGPA->len <= 0)
	{
	  /*
	    Monomer has no prop objects, so we just append to the
	    elongating gs the code of this monomer. However, if have
	    here the first code ever, the stretch gs does not exist,
	    so we first have to construct it. Otherwise just make an
	    append.
	  */
	  if (gs_codes == NULL)
	    {
	      gs_codes = g_string_new ("");
	      g_string_append_printf (gs_codes, "%s<codes>%s", 
				      lead, monomer->code);
	    }
	  else
	    g_string_append_printf (gs_codes, "%s", monomer->code);
	  
	  continue;
	}
      
      /*
	Apparently, if we are here, that means that there is at least
	one prop instance in the monomer->propGPA array of prop
	instances. We must deal with it (or them).
      */
      help = pxmchem_monomer_format_xml_string_monomer (monomer,
							indent, new_offset);
      
      if (help == NULL)
	{
	  /*
	    Apparently we failed foramtting an xml string for monomer, 
	    so we just fall back to the gs_codes stuff (see above).
	  */
	  if (gs_codes == NULL)
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		     _("%s@%d: plugin prop of name: '%s' has no registered "
		     "xml formatter... using monomer code only.\n"),
		     __FILE__, __LINE__, prop->name);
	      
	      gs_codes = g_string_new ("");
	      g_string_append_printf (gs_codes, "%s<codes>%s", 
				      lead, monomer->code);
	    }
	  else
	    g_string_append_printf (gs_codes, "%s", monomer->code);
	  
	  continue;
	}
      else
	{
	  /*
	    A full <code> node xml string has been successfully
	    formatted. This means that, if a <codes> element was
	    running, we must first close it, appends the contents of
	    the gs_codes GString to the elongating gs GString and
	    free gs_codes. Finally we append to gs the help string.
	  */
	  if (gs_codes != NULL)
	    {
	      /*
		Close the <codes> element.
	      */
	      gs_codes = g_string_append (gs_codes, "</codes>\n");
	      
	      /*
		Append to main GString 'gs' the contents of 'gs_codes'
		and free the 'gs_codes' GString.
	      */
	      g_string_append_printf (gs, "%s", gs_codes->str);

	      g_string_free (gs_codes, TRUE);
	      gs_codes = NULL;
	    }
	  
	  /*
	    Append the help xml formatted string corresponding to
	    the currently iterated monomer and free it.
	  */
	  g_string_append_printf (gs, "%s", help);
	  g_free (help);
	}
    }
  /*
    End of:
    for (iter = 0; iter < GPA->len; iter++)
  */

  /*
    Note that even if we went out of the for loop above, there might
    still be interesting data pending if gs_codes. So we have to
    check this. If gs_codes is not NULL, then that means that it was
    not flushed in a previous for loop round, which means it has date
    in it.
  */
  if (gs_codes != NULL)
    {
      /*
	Close the <codes> element.
      */
      gs_codes = g_string_append (gs_codes, "</codes>\n");
      
      /*
	Append to main GString gs the contents of gs_codes
	and free the gs_codes GString.
      */
      g_string_append_printf (gs, "%s", gs_codes->str);

      g_string_free (gs_codes, TRUE);
      gs_codes = NULL;
    }
  
  /*
    We have finished iterating in the GPA array of monomers, and
    thus finished formatting the xml string corresponding to the
    <polseq> element. Close this element and return the formatted
    <polseq> string.
  */
  
  g_free (lead);

  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s</polseq>\n", indent);
  
  g_free (lead);
  
  help = gs->str;
  
  g_string_free (gs, FALSE);
  
  return help;
}



gint
pxmchem_polymer_write_xml_file (PxmPolymer *polymer, gchar *file, 
				gpointer user_data)
{
  gint result = -1;

  gsize size = 0;
  
  gchar *xml = NULL;
      
  FILE *filep = NULL;


  g_assert (polymer != NULL);
  g_assert (file != NULL);
  

  /*
    First of all make sure we can open the file to write to it.
  */
  filep = fopen (file, "w");

  if (filep == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: failed to open file for writing: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return -1;
    }
  
  /*
    Construct a string with all the xml-formatted data pertaining
    to the polymer sequence 'polymer'.
  */
  xml = pxmchem_polymer_format_xml_string_polseqdata (polymer,
						      "  ", 0);
  g_assert (xml != NULL);

  /*
    Copy the xml data string to the file 'file'.
  */
  result = fputs (xml, filep);

  fclose (filep);
  
  if (result == EOF || result < 0)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: failed to save polymer sequence to file: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      g_free (xml);
      
      return -1;
    }
  
  /*
    Since we could successfully write this sequence to the file of
    name passed as parameter, we can set the file member of the
    plminfo structure to that file name. This way the calling
    function can use this file name to update the title of the window
    of the saved sequence. Also, this is useful because if this
    sequence is to be saved, then it knows already to what file it
    should be saved. 

    Note, however, that because of the way the 'set_file' function
    below works (freeing polymer->plminfo->file if non-NULL), we might
    free that member along with the other parameter 'file' if 'file'
    and 'polymer->plminfo->file' are the same pointer (we save a
    polymer sequence file over itself). If that happens, we will
    corrupt the file name : saving the first time will work ok, but
    then, saving a second time will fail because the
    polymer->plminfo->file (which is 'file') will be corrupt because
    of the freeing step in the function
    pxmchem_polymer_plminfo_set_file (). So we have to call this
    function conditionally:
  */
  if (polymer->plminfo->file != file)
    pxmchem_polymer_plminfo_set_file (polymer->plminfo, file);     

  /*
    Store the length of the xml string into result, so that we later
    can return this value.
  */  

  size = strlen (xml);
  g_assert (size < G_MAXINT);
  result = (gint) size;
  
  g_free (xml);

  return result;
}
  

gint
pxmchem_plminfo_write_xml_file (PxmPlminfo *plminfo, gchar *file, 
				gpointer user_data)
{
  gint result = -1;

  FILE *filep = NULL;

  /*
    xml formatting-related variables.
  */
  gchar indent [] = "  ";
  gint offset = 0;
  gint new_offset = 0;
  
  gsize size = 0;
  
  gchar *lead = NULL;
  gchar *xml = NULL;
  
  GPtrArray *fakeGPA = NULL;
  
  GString *gs = NULL;
  

  g_assert (plminfo != NULL);

  /*
    First of all make sure we can open the file to write to it.
  */
  filep = fopen (file, "w");

  if (filep == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: failed to open file for writing: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return -1;
    }
  
  gs = g_string_new ("");

  /*
    First start with the DTD string.
  */
  xml = pxmchem_polymer_format_xml_string_DTD ();
  g_assert (xml != NULL);
  
  g_string_append_printf (gs, "%s", xml);
  g_free (xml);
  

  /*
    Open the <polseqdata version="0.1"> node and immediately insert
    the non-iterative data.
  */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);
  
  g_string_append_printf (gs, "%s<polseqdata version=\"%s\">\n", 
			  lead, POLYMER_SEQUENCE_XML_VERSION);
  
  g_free (lead);
  new_offset = offset + 1;
  lead = libpolyxmass_globals_format_string_lead (indent, new_offset);
  
  /*
    Open the <polseqinfo> element which is simple ********************:
  */
  g_assert (plminfo != NULL);
  xml = pxmchem_polymer_format_xml_string_polseqinfo (plminfo,
						      indent, new_offset);
  g_assert (xml != NULL);
  g_string_append_printf (gs, "%s", xml);
  g_free (xml);
  
  /*
    To construct the <polseq> element we need an array of monomers. 
    We do not have it, but we can create an empty one, representative
    of a fake polymer sequence.
  */
  fakeGPA = g_ptr_array_new ();
  
  xml = 
    pxmchem_polymer_format_xml_string_polseq_with_mnm_GPA (fakeGPA,
							   indent, 
							   new_offset);
  g_assert (xml != NULL);
  g_string_append_printf (gs, "%s", xml);
  g_free (xml);

  g_ptr_array_free (fakeGPA, TRUE);
    
  /*
    At this point we have finished writing the pseudo polseq stuff.
    All we need to do is close the <polseqdata version="0.1"> element
    and clean up.
  */

  g_free (lead);

  /*
    At this point we have finished formatting the xml string for node
    <polseqdata version="0.1">. Close this element and return the
    string.
  */
  lead = libpolyxmass_globals_format_string_lead (indent, offset);

  g_string_append_printf (gs, "%s</polseqdata>\n", lead);
  
  g_free (lead);
  
  xml = gs->str;
  
  g_string_free (gs, FALSE);
  

  /*
    Copy the xml data string to the file 'file'. fputs() return a
    non-negative number on success, or EOF on error.
  */
  result = fputs (xml, filep);

  fclose (filep);
  
  if (result == EOF || result < 0)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: failed to save polymer sequence to file: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      g_free (xml);

      return -1;
    }
  
  /*
    Store the length of the xml string into result, so that we later
    can return this value. Note that, by necessity, the xml string
    cannot be 0-length, because we have explicitly written things
    into it in this precise function.
  */  
  size = strlen (xml);
  g_assert (size < G_MAXINT);
  result = (gint) size;

  g_free (xml);


  return result;
}




PxmPlminfo *
pxmchem_polymer_render_xml_node_polseqinfo (xmlDocPtr xml_doc,
					    xmlNodePtr xml_node,
					    gpointer user_data)
{
  /*
    The xml node we are in is structured this way:
    
    <polseqinfo>
    <type>protein</type>
    <name>hamster P-glycoprotein</name>
      
    And the xml_node parameter points to the 
    
    <polseqinfo> element tag:
    ^
    |
    +----- here we are right now.
     
    Which means that xml_node->name == "polseqinfo" and that
    we'll have to go one step down to the first child of the 
    current node in order to get to the <type> element.
    
    The xml element is defined this way:
    <!ELEMENT polseqinfo (type,name,code,author,date)>
    
  */
  PxmPlminfo *plminfo;

  xmlNodePtr xml_child_node = NULL;

  /*
    Make sure we have parameters pointing bona fide to the right
    xml element.
  */
  g_assert (xml_node != NULL);
  g_assert (0 == strcmp ((gchar *) xml_node->name, "polseqinfo"));
  
  /*
    Now go to the first child of current node: <name>.
  */
  xml_node = xml_node->children;

  /*
    From a rigorous XML parsing point of view, the blanks found in
    the XML document are considered to be nodes, and we have to detect
    these and take proper action: go next sibling (next blank) as long
    as blanks are encountered.
  */
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
  
  /*
    Check that we have effectively a <type> element here.
  */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "type"));
  
  plminfo = pxmchem_plminfo_new ();
  
  /*
    Since we have allocated a plminfo instance right at the line 
    above, we can immediately set the member data without using 
    the _set_xxx ().
  */
  plminfo->type = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  g_assert (plminfo->type != NULL);
  
  /*
    Go to next polseqinfo child.
  */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /*
    Check that we have effectively a <name> element here.
  */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "name"));

  plminfo->name = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  g_assert (plminfo->name != NULL);

  /*
    Go to next polseqinfo child.
  */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /*
    Check that we have effectively a <code> element here.
  */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "code"));

  plminfo->code = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  g_assert (plminfo->code != NULL);

  /*
    Go to next polseqinfo child.
  */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /*
    Check that we have effectively a <author> element here.
  */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "author"));

  plminfo->author = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
  g_assert (plminfo->author != NULL);

  /*
    Go to next polseqinfo child, which should be <date>.
  */
  xml_node = xml_node->next;
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;

  /*
    Check that we have effectively a <date> element here.
  */
  g_assert (0 == strcmp ((gchar *) xml_node->name, "date"));

  /*
    Go the first child of the <date> element: <year>.
  */
  xml_child_node = xml_node->children;

  while (TRUE == xmlIsBlankNode (xml_child_node))
    xml_child_node = xml_child_node->next;
  
  g_assert (0 == strcmp ((gchar *) xml_child_node->name, "year"));
  
  plminfo->date_year = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_child_node->xmlChildrenNode, 1);
  g_assert (plminfo->date_year != NULL);

  /*
    Go the second child of the <date> element: <month>.
  */
  xml_child_node = xml_child_node->next;
  while (TRUE == xmlIsBlankNode (xml_child_node))
    xml_child_node = xml_child_node->next;
  
  g_assert (0 == strcmp ((gchar *) xml_child_node->name, "month"));
  
  plminfo->date_month = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_child_node->xmlChildrenNode, 1);
  g_assert (plminfo->date_month != NULL);

  /*
    Go the third child of the <date> element: <day>.
  */
  xml_child_node = xml_child_node->next;
  while (TRUE == xmlIsBlankNode (xml_child_node))
    xml_child_node = xml_child_node->next;
  
  g_assert (0 == strcmp ((gchar *) xml_child_node->name, "day"));
  
  plminfo->date_day = 
    (gchar *) xmlNodeListGetString (xml_doc, xml_child_node->xmlChildrenNode, 1);
  g_assert (plminfo->date_day != NULL);

  /*
    debug_printf (("the date is %s - %s -%s\n",
    plminfo->date_year,
    plminfo->date_month,
    plminfo->date_day));
  */
  
  /*
    Finished the rendering of the current <polseqinfo> node.
  */
  return plminfo;
}


gint
pxmchem_polymer_render_xml_node_polseq (xmlDocPtr xml_doc,
					xmlNodePtr xml_node,
					GPtrArray *fillGPA,
					GPtrArray *refGPA,
					gint codelen,
					gpointer user_data)
{
  /*
    The xml node we are in is structured this way:
    
    <polseq>
    <codes>MEFEEDF</codes>
    <monomer>
    <code>S</code>
    <prop>
    <name>MONOMER_MODIF</name>
    <data>Phosphorylation</data>
    </prop>
    <prop>
    <name>MONOMER_COMMENT</name>
    <data>Phosphorylation is only partial</data>
    </prop>
    </monomer>
    <codes>GRKDKNFLKMGRKSKKEKKEKKPVVSLYMLVG</codes>
    <polseq>
    
    And the xml_node parameter points to the 
    
    <polseq> element tag:
    ^
    |
    +----- here we are right now.
     
    Which means that xml_node->name == "polseq" and that
    we'll have to go one step down to the first child of the 
    current node in order to get to the <codes> or <monomer> elements.
    
    ATTENTION: inside of the <polseq> element, there are ANY
    arbirtrary number of both <codes> or <monomer> elements in
    succession in whatever order .
    
    The DTD says this: <!ELEMENT polseq (codes|monomer)*>
  */

  PxmMonomer *monomer = NULL;

  gchar *help = NULL;

  gint count = -1;
  gint len = 0;
  

  xmlNodePtr xml_child_node = NULL;
  

  /*
    The array of reference monomers cannot be NULL or empty.
  */
  g_assert (refGPA != NULL && refGPA->pdata != NULL);

  /*
    Make sure we have parameters pointing bona fide to the right
    xml element.
  */
  g_assert (xml_node != NULL);
  g_assert (0 == strcmp ((gchar *) xml_node->name, "polseq"));
  
  g_assert (fillGPA != NULL);
  g_assert (refGPA != NULL);
  
  /*
    Now go to the first child of current node: either <codes> or
    <monomer>. Once in the xml_node->children level, the number of
    <codes> or <monomer> elements encountered is arbitrary AND in
    whatever order. This is why we use first a while () loop, and
    second a if condition to select one case (codes> or the other
    <monomer>.
  */
  xml_node = xml_node->children;

  /*
    From a rigorous XML parsing point of view, the blanks found in
    the XML document are considered to be nodes, and we have to detect
    these and take proper action: go next sibling (next blank) as long
    as blanks are encountered.
  */
  while (TRUE == xmlIsBlankNode (xml_node))
    xml_node = xml_node->next;
  
  /*
    There can be any number of <codes> and/or <monomer> elements in 
    whatever order, so the best is go with a while ().
  */
  while (xml_node != NULL)
    {
      if (0 == strcmp ((gchar *) xml_node->name, "codes"))
	{
	  help = 
	    (gchar *) xmlNodeListGetString (xml_doc, xml_node->xmlChildrenNode, 1);
	  
	  g_assert (help != NULL);
	  
	  count = pxmchem_monomer_fill_array_from_string (fillGPA,
							  help,  
							  codelen,
							  refGPA,
							  FALSE);
	  g_free (help);
	  
	  if (count == -1)
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		    _("%s@%d: failed to fill monomer array with string: '%s'\n"),
		     __FILE__, __LINE__, help);
	      
	      return -1;
	    }
	  
	  else
	    len = len + count;
	}
      
      else if (0 == strcmp ((gchar *) xml_node->name, "monomer"))
	{
	  xml_child_node = xml_node;
	  
	  monomer = pxmchem_monomer_render_xml_node_monomer (xml_doc,
							     xml_child_node,
							     refGPA,
							     NULL);
	  if (monomer == NULL)
	    {
	      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		    _("%s@%d: failed to render monomer node\n"),
		     __FILE__, __LINE__);
	      
	      return -1;
	    }

	  /*
	    The monomer could be created, so we add it to
	    the array of monomers in the fillGPA.
	  */
	  g_ptr_array_add (fillGPA, monomer);
	  
	  len = len + 1;
	}
      else
	{
	  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
		_("%s@%d: xml document not well-formed.\n"),
		 __FILE__, __LINE__);
	      
	  return -1;
	}
      
      xml_node = xml_node->next;

      /*
	From a rigorous XML parsing point of view, the blanks found in
	the XML document are considered to be nodes, and we have to detect
	these and take proper action: go next sibling (next blank) as long
	as blanks are encountered.
      */
      while (TRUE == xmlIsBlankNode (xml_node))
	xml_node = xml_node->next;
    }

  return len;
}


PxmPolymer *
pxmchem_polymer_render_xml_file (gchar *file,
				 GPtrArray *mnm_refGPA,
				 gint codelen,
				 gpointer user_data)
{
  /*
    We get a file name which must be a valid xml file. Then we have
    to parse this file and construct a polymer object. The refGPA
    should be the monomerGPA containing the reference monomers from
    the polchemdef object of the correct type. Codelen is also a member
    of the polchemdef object.
  */
  xmlDocPtr xml_doc = NULL;
  xmlNodePtr xml_node = NULL;
  
  PxmPolymer *polymer = NULL;

  gchar *help = NULL;
  
  
  g_assert (mnm_refGPA != NULL && mnm_refGPA->len > 0);

  g_assert (file != NULL);
  
  if (strlen (file) <= 0)
    return NULL;
  
  if (FALSE == g_file_test (file, G_FILE_TEST_EXISTS))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file not found: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }
      
  /*
    The very first thing we could do is check that the file is an
    XML file: <?xml version="1.0"?> should be the first item in the
    file.
  */
  if (FALSE == libpolyxmass_globals_check_xml_file (file))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is not valid xml format: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }
  
  /*
    Build an XML tree from a the file.
  */
  xml_doc = xmlParseFile (file);

  if (xml_doc == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is not valid xml format: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }

  /*
    Check if the document is of the right kind.
  */
  xml_node = xmlDocGetRootElement (xml_doc);

  if (xml_node == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is an empty xml file: '%s'\n"),
	     __FILE__, __LINE__, file);

      xmlFreeDoc (xml_doc);
      
      return NULL;
    }

  if (0 != strcmp ((gchar *) xml_node->name, "polseqdata"))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: file is of wrong type, "
	     "root node is not  \"polseqdata\": '%s'\n"),
	     __FILE__, __LINE__, file);
      
      xmlFreeDoc (xml_doc);
      
      return NULL;
    }

  help = (gchar *) xmlGetProp (xml_node, (guchar *) "version");
  
  pxmchem_polseqdata_xml_node_render_plugin = 
    pxmchem_polseqdata_xml_node_choose_renderer (help);
  
  if (pxmchem_polseqdata_xml_node_render_plugin == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: failed to get polseqdata renderer "
	       "for XML file format version: '%s'\n"),
	     __FILE__, __LINE__,
	     (help != NULL ? help : "(none)"));
      
      if (NULL != help)
	xmlFree (help);
      
      return NULL;
    }
  
  if (NULL != help)
    xmlFree (help);
  
  polymer = pxmchem_polseqdata_xml_node_render_plugin (xml_doc,
						       xml_node,
						       mnm_refGPA,
						       codelen,
						       user_data);
  
  /*
    Free the xml doc material.
  */
  xmlFreeDoc (xml_doc);


  if (polymer == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: failed to render the polymer from file: '%s'\n"),
	     __FILE__, __LINE__, file);

      return NULL;
    }
  
  /*
    We can set the polymer->plminfo->file member to the file name
    that we have used to render this polymer.
  */
  pxmchem_polymer_plminfo_set_file (polymer->plminfo, file);
  
  return polymer;
}
  

gchar *
pxmchem_polymer_get_xml_file_version (gchar *file)
{
  xmlDocPtr xml_doc = NULL;
  xmlNodePtr xml_node = NULL;
  
  gchar *version = NULL;
  gchar *help = NULL;
  
    g_assert (file != NULL);
  
  if (strlen (file) <= 0)
    return NULL;
  
  if (FALSE == g_file_test (file, G_FILE_TEST_EXISTS))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file not found: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }
      
  /*
    The very first thing we could do is check that the file is an
    XML file: <?xml version="1.0"?> should be the first item in the
    file.
  */
  if (FALSE == libpolyxmass_globals_check_xml_file (file))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is not valid xml format: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }
  
  /*
    Build an XML tree from a the file.
  */
  xml_doc = xmlParseFile (file);

  if (xml_doc == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is not valid xml format: '%s'\n"),
	     __FILE__, __LINE__, file);
      
      return NULL;
    }

  /*
    Check if the document is of the right kind.
  */
  xml_node = xmlDocGetRootElement (xml_doc);

  if (xml_node == NULL)
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	    _("%s@%d: file is an empty xml file: '%s'\n"),
	     __FILE__, __LINE__, file);

      xmlFreeDoc (xml_doc);
      
      return NULL;
    }

  if (0 != strcmp ((gchar *) xml_node->name, "polseqdata"))
    {
      g_log (G_LOG_DOMAIN, G_LOG_LEVEL_CRITICAL,
	     _("%s@%d: file is of wrong type, "
	     "root node is not \"polseqdata\": '%s'\n"),
	     __FILE__, __LINE__, file);
      
      xmlFreeDoc (xml_doc);
      
      return NULL;
    }
  
  help = (gchar *) xmlGetProp (xml_node, (guchar *) "version");
  
  if (help == NULL)
    {
      version = g_strdup ("none");
    }
  else
    {
      version = g_strdup (help);
      xmlFree (help);
    }


  /* Free the xml doc material.
   */
  xmlFreeDoc (xml_doc);
  
  return version;
}


/***** TEXT-format *****/
GString *
pxmchem_polymer_format_txt_string_polseqinfo (PxmPlminfo *plminfo,
					      PxmReportOpt *reportopt,
					      GString *gs)
{
  gchar *help = NULL;
  
  time_t the_time;
  struct tm *date_elements;
  gchar date_element [80] = "'\x0'";
    
  g_assert (plminfo != NULL);
  g_assert (gs != NULL);
  
  /*
    We are willing to create a string that contains the <polseqinfo>
    data looking like this:


    polymer sequence data:
    ----------------------

    type: protein
    name: hamster P-glycoprotein
    code: P09765
    author: rusconi
    date: year month day
  */
  
  
  g_string_append_printf (gs, _("Polymer Sequence Data:\n"
			  "----------------------\n\n"));
  
  /* 
     The type of the polymer sequence is compulsory.
  */
  g_assert (plminfo->type != NULL && strlen (plminfo->type) > 0);
  g_string_append_printf (gs, "Polymer Type: '%s'\n", plminfo->type);
  
  /*
    The name of the polymer is not formally required, so we have to
    check this carefully.
  */
  if (plminfo->name == NULL || strlen (plminfo->name) <= 0)
    plminfo->name = g_strdup_printf (_("Name not set"));

  g_string_append_printf (gs, _("Name: '%s'\n"), plminfo->name);
  
  /*
    The code of the polymer is not formally required, so we have to
    check this carefully.
  */
  if (plminfo->code == NULL || strlen (plminfo->code) <= 0)
    plminfo->code = g_strdup_printf (_("Code not set"));

  g_string_append_printf (gs, _("Code: '%s'\n\n"), plminfo->code);
  
  if ((reportopt->polymer_opt & PXM_PLM_REPORTOPT_AUTHOR) == 
      PXM_PLM_REPORTOPT_AUTHOR)
    {
      /*
	Special case of username, since we have to modify the
	plminfo->author member value, because we are writing this
	sequence to file. Function g_get_user_name () does not allocate
	the string.
      */
      help = (gchar *) g_get_user_name ();
      g_assert (help != NULL);
      g_string_append_printf (gs, _("Author: '%s'\n\n"), help);
    }
  
  if ((reportopt->polymer_opt & PXM_PLM_REPORTOPT_DATE) == 
      PXM_PLM_REPORTOPT_DATE)
    {
      /*
	Special case of the date.
      */
      time (&the_time);
      date_elements = gmtime (&the_time);

      g_string_append_printf (gs, _("Date: "));
  
      /* Year
       */
      strftime (date_element, 79, "%Y", date_elements);
      g_string_append_printf (gs, "%s ", date_element);

      /* Month
       */
      strftime (date_element, 79, "%m", date_elements);
      g_string_append_printf (gs, "%s ", date_element);

      /* Date
       */
      strftime (date_element, 79, "%d", date_elements);
      g_string_append_printf (gs, "%s\n\n", date_element);
    }
    
  return gs;
}


GString *
pxmchem_polymer_format_txt_string_polseq_with_mnm_GPA (GPtrArray *GPA,
						       PxmReportOpt *reportopt,
						       GString *gs)
{
  gint iter = 0;
  gint char_count = 0;
    
  gchar *help = NULL;
  
  GString *gs_seq = NULL;
  GString *gs_prop = NULL;

  PxmMonomer *monomer = NULL;
  
  
  g_assert (GPA != NULL && reportopt != NULL);
  g_assert (gs != NULL);

  gs_seq = g_string_new ("");
  gs_prop = g_string_new ("");

  /*
    We are willing to create a string describing the polymer sequence
    looking like this:
   
    polymer sequence:
    -----------------

    MEFEEDWYGEEDWYGTEEDWYGTEEDWYGTEEDWYGTEEDWYGTEEDWYGT

    monomer Y at position 123 has prop "MODIF": Acetylation
    monomer K at position 136 has prop "COMMENT": This one is methylated
  */
  
  for (iter = 0; iter < GPA->len; iter++)
    {
      if (char_count >= 70)
	{
	  gs_seq = g_string_append (gs_seq, "\n");
	  char_count = 0;
	}
      
      monomer = g_ptr_array_index (GPA, iter);
      g_assert (monomer != NULL);
      
      /* First integrate the monomer code in the sequence string.
       */
      g_string_append_printf (gs_seq, "%s", monomer->code);
            
      char_count += strlen (monomer->code);
      
      /* We only deal with monomer prop objects if that is required
	 by the 'reportopt' argument to the function call.
      */
      if ((reportopt->monomer_opt & PXM_MNM_REPORTOPT_PROP) == 
	  PXM_MNM_REPORTOPT_PROP)
	{
	  /* If the monomer has no prop object in its propGPA array, then
	     we have finished with it. Otherwise, we'll have to format a
	     string describing its prop object(s) in detail in the gs_prop
	     string... that we'll append later to the gs_seq sequence
	     string proper.
	  */
	  if (monomer->propGPA->len <= 0)
	    {
	      continue;
	    }
	  
	  /*
	    Apparently, if we are here, that means that there is at least
	    one prop instance in the monomer->propGPA array of prop
	    instances. We must deal with it (or them).
	  */
	  help = 
	    pxmchem_monomer_format_txt_string_monomer (monomer, reportopt);
	  
	  g_assert (help != NULL);
	  
	  g_string_append_printf (gs_prop, 
				  _("Monomer: '%s' at position: '%d', "
				  "has\n%s\n\n"), 
				  monomer->name,
				  iter,
				  help);
	  
	  g_free (help);
	}
    }
  /*
    End of:
    for (iter = 0; iter < GPA->len; iter++)
  */
  g_string_append_printf (gs,
			  _("Polymer Sequence:\n"
			  "-----------------\n\n"
			  "%s\n\n"),
			  gs_seq->str);
  
  if ((reportopt->monomer_opt & PXM_MNM_REPORTOPT_PROP) == 
      PXM_MNM_REPORTOPT_PROP)
    g_string_append_printf (gs,
			    _("Monomer Properties:\n"
			    "-------------------\n\n"
			    "%s\n\n"),
			    gs_prop->str);
    
  g_string_free (gs_seq, TRUE);
  g_string_free (gs_prop, TRUE);
    
  return gs;
}

GString *
pxmchem_polymer_format_txt_string_polymer_prop (PxmPolymer *polymer,
						PxmReportOpt *reportopt,
						GString *gs)
{
  gint iter = 0;
  gchar *txt = NULL;
  
  GString *gs_local = gs;
  
  PxmProp *prop = NULL;
  

  /* We have to iterate in the array of prop object from the polymer
     and for each prop instance we have to produce a txt string that we
     append to the gs GString passed as parameter.
  */
  

  g_assert (polymer != NULL);
  g_assert (reportopt != NULL);
  g_assert (gs != NULL);
  
  
  for (iter = 0; iter < polymer->propGPA->len; iter++)
    {
      /* Get the iterated PxmProp object.
       */
      prop = g_ptr_array_index (polymer->propGPA, iter);
      g_assert (prop != NULL && prop->name != NULL);

      /* Ask what function is responsible for formatting the xml
        string corresponding to the current PxmProp instance.
       */
      libpolyxmass_prop_formatter_txt_prop_plugin =
	libpolyxmass_prop_choose_format_txt_prop_plugin (prop->name);
      
      if (libpolyxmass_prop_formatter_txt_prop_plugin != NULL)
	{
	  txt = libpolyxmass_prop_formatter_txt_prop_plugin (prop, NULL);
	  
	  g_assert (txt != NULL);
	  
	  /* In some cases, we want to bypass a given prop processing
	     (like the monomer chembridge prop, for example) returning
	     a string that says not to append it to the elongating
	     gs_prop GString... this is why we check for a "non
	     applicable" "N/A" pattern. See the chembridge dealing
	     function write_seq_monomer_chembridge_prop_plugin for an
	     explanation.
	  */
	  if (0 != strcmp ("N/A", txt))
	    gs_local = g_string_append (gs_local, txt);
	  
	  g_free (txt);
	  txt = NULL;
	}
      else
	{
	  g_log (G_LOG_DOMAIN, G_LOG_LEVEL_MESSAGE,
		 _("%s@%d: plugin prop: '%s' has no registered txt formatter\n"),
		 __FILE__, __LINE__, prop->name);
	  
	}
    }
  /* end of:
     for (iter = 0; iter < polymer->propGPA->len; iter++)
  */
  
  g_string_append_printf (gs_local, 
			  _("--------------------------------------------\n\n"));
  return gs_local;
}




/*
  FREE'ING FUNCTIONS
*/
gboolean 
pxmchem_polymer_free (PxmPolymer *polymer)
{
  g_assert (polymer != NULL);
  
  /*
    First free inner material.
  */
  if (polymer->propGPA != NULL)
    libpolyxmass_prop_GPA_free (polymer->propGPA);
  
  if (polymer->version != NULL)
    g_free (polymer->version);
  
  if (polymer->plminfo != NULL)
    pxmchem_plminfo_free (polymer->plminfo);

  if (polymer->masspair_seq != NULL)
    libpolyxmass_masspair_free (polymer->masspair_seq);
  
  if (polymer->masspair_sel != NULL)
    libpolyxmass_masspair_free (polymer->masspair_sel);
  
  if (polymer->monomerGPA != NULL)
    {
      if (0 == pxmchem_monomer_GPA_free (polymer->monomerGPA))
	g_log (G_LOG_DOMAIN, G_LOG_LEVEL_MESSAGE,
	       _("%s@%d: the polymer's monomers' array was empty.\n"),
	       __FILE__, __LINE__);
    }
  
  g_free (polymer);
  
  return TRUE;
}


gboolean
pxmchem_plminfo_free (PxmPlminfo *plminfo)
{
  g_assert (plminfo != NULL);
  
  
  if (plminfo->type != NULL)
    g_free (plminfo->type);
  
  if (plminfo->name != NULL)
    g_free (plminfo->name);
  
  if (plminfo->code != NULL)
    g_free (plminfo->code);
  
  if (plminfo->author != NULL)
    g_free (plminfo->author);
  
  if (plminfo->date_year != NULL)
    g_free (plminfo->date_year);
  
  if (plminfo->date_month != NULL)
    g_free (plminfo->date_month);
  
  if (plminfo->date_day != NULL)
    g_free (plminfo->date_day);
  
  if (plminfo->file != NULL)
    g_free (plminfo->file);

  g_free (plminfo);
  
  return TRUE;
}



/*
  GPtrArray-RELATED FUNCTIONS
*/
gint
pxmchem_polymer_GPA_free (GPtrArray *GPA)
{
  gint count = 0;
  
  PxmPolymer *polymer = NULL;
  

  g_assert (GPA != NULL);
  
  while (GPA->len > 0)
    {
      polymer = g_ptr_array_remove_index (GPA, 0);
      g_assert (polymer != NULL);
      pxmchem_polymer_free (polymer);
      count++;
    }
  
  g_ptr_array_free (GPA, TRUE);

  return count;
}
  














