Skip to content

Bugfix/#2045 Export of unresolved monomer to everything except KET and IDT should cause specific error message (but it doesn't) #2806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions api/c/tests/unit/tests/formats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,3 +316,167 @@ TEST_F(IndigoApiFormatsTest, ket_to_idt)
// printf("res=%s", res);
ASSERT_STREQ("ARAS", res);
}

TEST_F(IndigoApiFormatsTest, ket_to_mol_v3000_unknown_monomers)
{
try
{
int molecule = -1;
molecule = indigoLoadKetDocumentFromFile(dataPath("molecules/basic/unknown_monomers.ket").c_str());
indigoSetOption("molfile-saving-mode", "3000");

indigoMolfile(molecule);
}
catch (const std::exception& e)
{
ASSERT_STREQ("molfile saver: i2AmPr cannot be written in MDL Molfile format.", e.what());
}
}

TEST_F(IndigoApiFormatsTest, ket_to_mol_v2000_unknown_monomers)
{
try
{
int molecule = -1;
molecule = indigoLoadKetDocumentFromFile(dataPath("molecules/basic/unknown_monomers.ket").c_str());
indigoSetOption("molfile-saving-mode", "2000");

indigoMolfile(molecule);
}
catch (const std::exception& e)
{
ASSERT_STREQ("molfile saver: i2AmPr cannot be written in MDL Molfile format.", e.what());
}
}

TEST_F(IndigoApiFormatsTest, ket_to_sdf_v3000_unknown_monomers)
{
try
{
int molecule = -1;
molecule = indigoLoadKetDocumentFromFile(dataPath("molecules/basic/unknown_monomers.ket").c_str());
indigoSetOption("molfile-saving-mode", "3000");

auto buffer = indigoWriteBuffer();
auto comp_it = indigoIterateComponents(molecule);

while (indigoHasNext(comp_it))
{
const auto frag = indigoNext(comp_it);
const auto mol = indigoClone(frag);
indigoSdfAppend(buffer, mol);
}
}
catch (const std::exception& e)
{
ASSERT_STREQ("molfile saver: i2AmPr cannot be written in MDL Molfile format.", e.what());
}
}

TEST_F(IndigoApiFormatsTest, ket_to_sdf_v2000_unknown_monomers)
{
try
{
int molecule = -1;
molecule = indigoLoadKetDocumentFromFile(dataPath("molecules/basic/unknown_monomers.ket").c_str());
indigoSetOption("molfile-saving-mode", "2000");

auto buffer = indigoWriteBuffer();
auto comp_it = indigoIterateComponents(molecule);

while (indigoHasNext(comp_it))
{
const auto frag = indigoNext(comp_it);
const auto mol = indigoClone(frag);
indigoSdfAppend(buffer, mol);
}
}
catch (const std::exception& e)
{
ASSERT_STREQ("molfile saver: i2AmPr cannot be written in MDL Molfile format.", e.what());
}
}

TEST_F(IndigoApiFormatsTest, ket_to_rdf_v3000_unknown_reaction)
{
try
{
int reaction = -1;
reaction = indigoLoadReactionFromFile(dataPath("molecules/basic/unknown_reaction.ket").c_str());
indigoSetOption("molfile-saving-mode", "2000");

auto buffer = indigoWriteBuffer();
auto reac_it = indigoIterateReactions(reaction);
indigoRdfHeader(buffer);

while (indigoHasNext(reac_it))
{
const auto reac_obj = indigoNext(reac_it);
const auto reac = indigoClone(reac_obj);
indigoRdfAppend(buffer, reac);
}
}
catch (const std::exception& e)
{
ASSERT_STREQ("molfile saver: i2AmPr cannot be written in MDL Molfile format.", e.what());
}
}

TEST_F(IndigoApiFormatsTest, ket_to_rdf_v2000_unknown_reaction)
{
try
{
int reaction = -1;
reaction = indigoLoadReactionFromFile(dataPath("molecules/basic/unknown_reaction.ket").c_str());
indigoSetOption("molfile-saving-mode", "2000");

auto buffer = indigoWriteBuffer();
auto reac_it = indigoIterateReactions(reaction);
indigoRdfHeader(buffer);

while (indigoHasNext(reac_it))
{
const auto reac_obj = indigoNext(reac_it);
const auto reac = indigoClone(reac_obj);
indigoRdfAppend(buffer, reac);
}
}
catch (const std::exception& e)
{
ASSERT_STREQ("molfile saver: i2AmPr cannot be written in MDL Molfile format.", e.what());
}
}

TEST_F(IndigoApiFormatsTest, ket_to_canonical_smarts_unknown_monomers)
{
try
{
int molecule = -1;
molecule = indigoLoadMoleculeFromFile(dataPath("molecules/basic/unknown_monomers.ket").c_str());

auto buffer = indigoWriteBuffer();

indigoCanonicalSmarts(molecule);
}
catch (const std::exception& e)
{
ASSERT_STREQ("SMILES saver: i2AmPr cannot be written in SMILES/SMARTS format.", e.what());
}
}

TEST_F(IndigoApiFormatsTest, ket_to_smarts_unknown_monomers)
{
try
{
int molecule = -1;
molecule = indigoLoadMoleculeFromFile(dataPath("molecules/basic/unknown_monomers.ket").c_str());

auto buffer = indigoWriteBuffer();

indigoSmarts(molecule);
}
catch (const std::exception& e)
{
ASSERT_STREQ("core: <KetDocument> can not be converted to SMARTS", e.what());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This message looks wrong.
Looks like no exception thrown.
Could you please rewrite these tests like

catch (const std::exception& e)
{
    ASSERT_STREQ(msg, e.what());
    return;
}
ASSERT(false);

just to be sure that exception thrown

Copy link
Collaborator Author

@David-Kuts David-Kuts Mar 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exception is thrown in void IndigoSmilesSaver::generateSmarts(IndigoObject& obj, Array<char>& out_buffer) in indigo_severs.cpp.

            ...
            loader_tmp.loadQueryReaction(qreaction);
            saver.saveQueryReaction(qreaction);
        }
    }
    else
        throw IndigoError("%s can not be converted to SMARTS", obj.debugInfo());
    out_buffer.push(0);

In this case, do I still need to write code in the way you suggest?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Beacuse after indigoLoadMoleculeFromFile molecule not a KetDocument - but error message contains this type wich is strange.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've checked. Yes, it is not throwing an exception.

In void IndigoSmilesSaver::generateSmarts(IndigoObject& obj, Array<char>& out_buffer) in

...
{
      Array<char> mol_out_buffer;
      ArrayOutput mol_output(mol_out_buffer);
      MolfileSaver saver_tmp(mol_output);
      saver_tmp.saveMolecule(mol.asMolecule());
      mol_out_buffer.push(0);
      BufferScanner sc(mol_out_buffer);
      MolfileLoader loader_tmp(sc);
      QueryMolecule qmol;
      loader_tmp.loadQueryMolecule(qmol);
      saver.saveQueryMolecule(qmol);
}
...
  1. The first one called is void MolfileSaver::saveMolecule(Molecule& mol) ==> void MolfileSaver::saveMolecule(Molecule& mol) ==> void MolfileSaver::_validate(BaseMolecule& bmol)

  2. The second one called is void SmilesSaver::saveQueryMolecule(QueryMolecule& mol) ==> void SmilesSaver::_saveMolecule() ==> void SmilesSaver::_validate(BaseMolecule& bmol)

At the end of those calls bool BaseMolecule::getUnresolvedTemplatesList(BaseMolecule& bmol, std::string& unresolved) is called

The first and the second ones have different behavior:

  1. unresolved.size() returns 0
  2. does not even enter the if-body:
if (!bmol.isQueryMolecule())
    {
        if (bmol.tgroups.getTGroupCount())
        {
            for (auto tgidx = bmol.tgroups.begin(); tgidx != bmol.tgroups.end(); tgidx = bmol.tgroups.next(tgidx))
            {
                auto& tg = bmol.tgroups.getTGroup(tgidx);
                if (tg.unresolved && tg.tgroup_alias.size())
                {
                    if (unresolved.size())
                        unresolved += ',';
                    unresolved += tg.tgroup_alias.ptr();
                }
            }
        }
    }

Could you tell, please, which behavior is expected and which one is not?

Maybe you have any other suggestions to fix the bug due to my lack of understanding of how the things are working.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove if (!bmol.isQueryMolecule()) - looks like this is something from first versions without query molecules support.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getUnresolvedTemplatesList should return correct list of unresolved templates.
If it return wrong list - thois is a bug.

Copy link
Collaborator

@AliaksandrDziarkach AliaksandrDziarkach Mar 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to getUnresolvedTemplatesList code it could return empty list if uresolved has no alias.
Pleas add hasUnresolvedTemplates wich will return true if at least one unresolved template exists.
And use it instead of getUnresolvedTemplatesList

}
}
2 changes: 2 additions & 0 deletions core/indigo-core/molecule/src/molfile_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,8 @@ void MolfileSaver::_writeMultiString(Output& output, const char* string, int len

void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query)
{
_validate(mol);

_handleCIP(mol);
if (mol.tgroups.getTGroupCount())
_handleMonomers(mol);
Expand Down
128 changes: 128 additions & 0 deletions data/molecules/basic/unknown_monomers.ket
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
{
"root": {
"nodes": [
{
"$ref": "monomer0"
}
],
"connections": [],
"templates": [
{
"$ref": "monomerTemplate-unknown_monomer_with_idt_alias_i2AmPr"
}
]
},
"monomer0": {
"type": "monomer",
"id": "0",
"position": {
"x": 1.25,
"y": -1.25
},
"alias": "i2AmPr",
"templateId": "unknown_monomer_with_idt_alias_i2AmPr",
"seqid": 0
},
"monomerTemplate-unknown_monomer_with_idt_alias_i2AmPr": {
"type": "monomerTemplate",
"atoms": [
{
"label": "C",
"location": [
0,
0,
0
]
},
{
"label": "C",
"location": [
1,
1,
1
]
},
{
"label": "C",
"location": [
2,
2,
2
]
},
{
"label": "C",
"location": [
3,
3,
3
]
}
],
"bonds": [
{
"type": 1,
"atoms": [
0,
1
]
},
{
"type": 1,
"atoms": [
1,
2
]
},
{
"type": 1,
"atoms": [
2,
3
]
},
{
"type": 1,
"atoms": [
0,
3
]
}
],
"class": "CHEM",
"id": "unknown_monomer_with_idt_alias_i2AmPr",
"fullName": "i2AmPr",
"alias": "i2AmPr",
"attachmentPoints": [
{
"attachmentAtom": 0,
"leavingGroup": {
"atoms": []
}
},
{
"attachmentAtom": 1,
"leavingGroup": {
"atoms": []
}
},
{
"attachmentAtom": 2,
"leavingGroup": {
"atoms": []
}
},
{
"attachmentAtom": 3,
"leavingGroup": {
"atoms": []
}
}
],
"idtAliases": {
"base": "i2AmPr"
},
"unresolved": true,
"naturalAnalogShort": ""
}
}
Loading
Loading