SlideShare uma empresa Scribd logo
1 de 23
Baixar para ler offline
analytics
aboutme
whoami...
DavySuvee
@DSUVEE
➡ bigdataarchitect@datablend-continuum
• providebigdataandnosqlconsultancy
• 5yearsofhands-onexpertise
fromdatatoinsights
dataanalyticsinmongodb
chemicalsimilarityuse-case
nativeapi
aggregationframework
map/reduce
chemicalsimilarity(1)
★ 31millioncompoundsavailable
➡ pubchem
➡ Question:
★ findcompoundssimilartoa
particularothercompound
chemicalsimilarity(2)
0[N]1[C	
  O]2[C	
  C	
  C]
0[N]1[C	
  O]2[C	
  C	
  C]3[C	
  C	
  C	
  C	
  C]
0[C]1[C	
  C	
  C]2[C	
  C	
  N	
  O]3[C	
  C	
  C	
  C	
  O	
  O]
0[C]1[C	
  C]2[C	
  C	
  C	
  C	
  O]3[C	
  C	
  N	
  O]
0[O]1[C]2[C	
  O]3[C	
  C	
  C]
0[C]1[C	
  O	
  O]2[C	
  C	
  C	
  O]
0[C]1[C	
  C]2[C	
  C]
0[C]1[C]2[C]3[C	
  O]
0[C]1[C	
  C	
  N]2[C	
  C	
  C	
  C	
  O]3[C	
  C	
  C	
  O]
...
chemicalsimilarity(3)
0[N]1[C	
  O]2[C	
  C	
  C]
0[N]1[C	
  O]2[C	
  C	
  C]3[C	
  C	
  C	
  C	
  C]
0[C]1[C	
  C	
  C]2[C	
  C	
  N	
  O]3[C	
  C	
  C	
  C	
  O	
  O]
0[C]1[C	
  C]2[C	
  C	
  C	
  C	
  O]3[C	
  C	
  N	
  O]
0[O]1[C]2[C	
  O]3[C	
  C	
  C]
0[C]1[C	
  O	
  O]2[C	
  C	
  C	
  O]
0[C]1[C	
  C]2[C	
  C]
0[C]1[C]2[C]3[C	
  O]
0[C]1[C	
  C	
  N]2[C	
  C	
  C	
  C	
  O]3[C	
  C	
  C	
  O]
...
0[N]1[C	
  O]2[C	
  C	
  C]3[C	
  C	
  C	
  C	
  C	
  C]
0[C]1[C	
  C	
  C]2[C	
  C	
  N	
  O]3[C	
  C	
  C	
  C	
  O	
  O]
0[C]1[C	
  C]2[C	
  C	
  C	
  C	
  O]3[C	
  C	
  N	
  O]
0[O]1[C]2[C	
  O]3[C	
  C	
  C	
  C]
0[C]1[C	
  O	
  O]2[C	
  C	
  C	
  O]
0[C]1[C	
  C]2[C	
  C]
0[N]1[C	
  O]2[C	
  C	
  C]
0[C]1[C]2[C]3[C	
  O]
0[C]1[C	
  C	
  N]2[C	
  C	
  C	
  C	
  O]3[C	
  C	
  C	
  O]
...
equalityviatanimoto
but31millioncalculations?
mongodbdatamodel(1)
{	
  
	
  	
  	
  	
  "compound_cid"	
  :	
  "46200001"	
  ,	
  
	
  	
  	
  	
  "smiles"	
  :	
  "CCC1C(C(C(C(=NOCC=CCN2CCCCC2)C(CC(C(C(C(C(C(=O)O1)C)OC3C"	
  ,
	
  	
  	
  	
  "fingerprint_count"	
  :	
  120	
  ,	
  
	
  	
  	
  	
  "fingerprints"	
  :	
  [	
  
	
  	
  	
  	
  	
  	
  	
  	
  "0[N]1[C	
  O]2[C	
  C	
  C]"	
  ,
	
  	
  	
  	
  	
  	
  	
  	
  "0[N]1[C	
  O]2[C	
  C	
  C]3[C	
  C	
  C	
  C	
  C]"	
  ,
	
  	
  	
  	
  	
  	
  	
  	
  "0[C]1[C	
  C	
  C]2[C	
  C	
  N	
  O]3[C	
  C	
  C	
  C	
  O	
  O]"	
  ,
	
  	
  	
  	
  	
  	
  	
  	
  "0[C]1[C	
  C]2[C	
  C	
  C	
  C	
  O]3[C	
  C	
  N	
  O]"	
  ,
	
  	
  	
  	
  	
  	
  	
  	
  "0[O]1[C]2[C	
  O]3[C	
  C	
  C]"	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "0[C]1[C	
  O	
  O]2[C	
  C	
  C	
  O]"	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "0[C]1[C	
  C]2[C	
  C]"	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "0[C]1[C]2[C]3[C	
  O]"	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "0[C]1[C	
  C	
  N]2[C	
  C	
  C	
  C	
  O]3[C	
  C	
  C	
  O]"	
  ,
	
  	
  	
  	
  	
  	
  	
  	
  ...	
  ]	
  ,	
  
}
compound
collection
mongodbdatamodel(2) fingerprint
collection
{	
  
	
  	
  	
  	
  "fingerprint"	
  :	
  "0[N]1[C	
  O]2[C	
  C	
  C]",
	
  	
  	
  	
  "count"	
  :	
  472
}
{	
  
	
  	
  	
  	
  "fingerprint"	
  :	
  "0[N]1[C	
  O]2[C	
  C	
  C]3[C	
  C	
  C	
  C	
  C]",
	
  	
  	
  	
  "count"	
  :	
  41
}
{
	
  	
  	
  	
  "fingerprint"	
  :	
  "0[O]1[C]2[C	
  O]3[C	
  C	
  C]",
	
  	
  	
  	
  "count"	
  :	
  1343
}
queryingpattern(1)
★ from31million->potentialmatch
➡ narrowdownthesearchspace
➡ imagine80%searchforacompoundwith40features
➡ 32
➡ 50
queryingpattern(2)
★ from31million->potentialmatch
➡ narrowdownthesearchspace
➡ imagine80%searchforacompoundwith40features
(9fingerprints)
findthefingerprints(1)
//	
  Retrieve	
  the	
  particular	
  compound
DBObject	
  object	
  =	
  
compoundsCollection.findOne(QueryBuilder.start("compound_cid").is(compound).get());
//	
  Retrieve	
  the	
  relevant	
  properties
String	
  pubchemcid	
  =	
  (String)object.get(COMPOUNDCID_PROPERTY);
List<Integer>	
  fingerprintstofind	
  =	
  
Arrays.asList(((BasicDBList)object.get(FINGERPRINTS_PROPERTY)).toArray(new	
  
Integer[]{}));
//	
  Sort	
  the	
  fingerprints	
  on	
  total	
  number	
  of	
  occurences
fingerprintstofind	
  =	
  findSortedFingerprints(fingerprintstofind);
findthefingerprints(2)
List<Integer>	
  sortedFingerprintsToFind	
  =	
  new	
  ArrayList<Integer>();
	
  	
  //	
  Find	
  all	
  fingerprint	
  count	
  documents
	
  	
  DBObject	
  fingerprintcountquery	
  =	
  
	
  	
  	
  	
  QueryBuilder.start(FINGERPRINT_PROPERTY).in(fingerprintsToFind.toArray()).get();
	
  	
  
	
  	
  //	
  Only	
  retrieve	
  the	
  fingerprint	
  string	
  itself
	
  	
  DBObject	
  fingerprintcountselection	
  =	
  
	
  	
  	
  	
  QueryBuilder.start(FINGERPRINT_PROPERTY).is(1).get();
	
  	
  	
  	
  	
  	
  	
  	
  
	
  	
  //	
  Sort	
  the	
  result	
  on	
  count
	
  	
  DBObject	
  fingerprintcountsort	
  =	
  QueryBuilder.start(COUNT_PROPERTY).is(1).get();
	
  	
  //	
  Execute	
  the	
  query	
  on	
  the	
  fingerprint	
  counts	
  collection
	
  	
  DBCursor	
  fingerprintcounts	
  =	
  
	
  	
  	
  	
  fingerprintCountsCollection.find(fingerprintcountquery,	
  fingerprintcountselection).
	
  	
  	
  	
  sort(fingerprintcountsort);
nativequery(1)
//	
  Find	
  the	
  matching	
  compounds
DBObject	
  compoundquery	
  =	
  
	
  	
  QueryBuilder.
	
  	
  	
  	
  start(FINGERPRINTS_PROPERTY).
	
  	
  	
  	
  in(fingerprintsToConsider).
	
  	
  	
  	
  and(FINGERPRINTCOUNT_PROPERTY).lessThanEquals(maxnumberofcompoundfingerprints).
	
  	
  	
  	
  and(FINGERPRINTCOUNT_PROPERTY).greaterThanEquals(minnumberofcompoundfingerprints).
	
  	
  	
  	
  get();
nativequery(2)
//	
  Execute	
  the	
  query
DBCursor	
  compounds	
  =	
  compoundsCollection.find(compoundquery);
	
  	
  
//	
  Let's	
  process	
  the	
  found	
  compounds	
  locally
while(compounds.hasNext())	
  {
	
  	
  DBObject	
  compound	
  =	
  compounds.next();
	
  	
  BasicDBList	
  fingerprints	
  =	
  ((BasicDBList)	
  	
  
	
  	
  	
  	
  compound.get(FINGERPRINTS_PROPERTY));
	
  	
  
	
  	
  //	
  Calculate	
  the	
  intersection	
  on	
  the	
  total	
  list	
  of	
  fingerprints
	
  	
  fingerprints.retainAll(fingerprintsToFind);
	
  	
  if	
  (fingerprints.size()	
  >=	
  minnumberofcompoundfingerprints)	
  {
	
  	
  	
  	
  //	
  Calculate	
  the	
  tanimoto	
  coefficient	
  ...
	
  	
  }
}	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  
map/reducequery(1)
map/reducequery(2)
	
  //	
  Find	
  all	
  compounds
DBObject	
  compoundquery	
  =	
  ...	
  
//	
  The	
  map	
  fuction
String	
  map	
  =	
  "function()	
  {	
  	
  "	
  +
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "	
  	
  var	
  found	
  =	
  0;	
  "	
  +
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "	
  	
  var	
  fingerprintslength	
  =	
  this.fingerprints.length;	
  "	
  +
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "	
  	
  for	
  (i	
  =	
  0;	
  i	
  <	
  fingerprintslength;	
  i++)	
  {	
  "	
  +
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "	
  	
  	
  	
  if	
  (fingerprintstofind[this.fingerprints[i]]	
  ===	
  true)	
  {	
  found++;	
  }	
  "	
  +
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "	
  	
  }	
  "	
  +
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "	
  	
  if	
  (found	
  >=	
  minnumberofcompoundfingerprints)	
  {	
  "	
  +
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "	
  	
  	
  	
  emit	
  (this.compound_cid,	
  {found	
  :	
  found,	
  "	
  +	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  total	
  :	
  this.fingerprint_count}	
  );	
  }	
  "	
  +
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "}";
//	
  Execute	
  the	
  map	
  reduce	
  function
MapReduceCommand	
  mr	
  =	
  new	
  MapReduceCommand(compoundsCollection,	
  map,	
  "",	
  
	
  	
  MapReduceCommand.OutputType.INLINE,	
  compoundquery);	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  
aggregationframework(1)
aggregationframework(2)
{	
  "aggregate"	
  :	
  "compounds"	
  ,	
  
	
  	
  "pipeline"	
  :	
  [	
  
	
  	
  	
  	
  	
  {	
  "$match"	
  :	
  {	
  "fingerprint_count"	
  :	
  {	
  "$gte"	
  :	
  4	
  ,	
  "$lte"	
  :	
  1780}}}	
  ,	
  
	
  	
  	
  	
  	
  {	
  "$unwind"	
  :	
  "$fingerprints"}	
  ,	
  
	
  	
  	
  	
  	
  {	
  "$match"	
  :	
  {	
  "fingerprints"	
  :	
  {	
  "$in"	
  :	
  [	
  1960,	
  15111,	
  ...,94	
  ,	
  26]}}}	
  ,	
  	
  
	
  	
  	
  	
  	
  {	
  "$group"	
  :	
  {	
  "_id"	
  :	
  "$compound_cid"	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "fingerprintmatches"	
  :	
  {	
  "$sum"	
  :	
  1}	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "totalcount"	
  :	
  {	
  "$first"	
  :	
  "$fingerprint_count"}	
  }}}	
  ,	
  	
  
	
  	
  	
  	
  	
  {	
  "$project"	
  :	
  {	
  "_id"	
  :	
  1	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "tanimoto"	
  :	
  {	
  "$divide"	
  :	
  [	
  "$fingerprintmatches"	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  {	
  "$subtract"	
  :	
  [	
  {	
  "$add"	
  :	
  [	
  89	
  ,	
  "$totalcount"]}	
  ,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "$fingerprintmatches"]}]}}	
  ,	
  
	
  	
  	
  	
  	
  {	
  "$match"	
  :	
  {	
  "tanimoto"	
  :	
  {	
  "$gte"	
  :	
  0.05}}}]}
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  
benchmarkresults
★ native->202ms
➡ 100Kcompounds,0.8tanimoto
★ map/reduce->214ms
★ aggregationframework->609ms
★ native->1909ms
➡ 100Kcompounds,0.05tanimoto
★ map/reduce->20978ms
★ aggregationframework->1613ms
diymongodbanalytics...
➡ http://datablend.be/?p=256
➡ thejoyofalgorithmsandnosql:amongodbexample
➡ http://github.com/datablend/mongo-compound-comparison-revisited
Questions?
E-MAIL
info@datablend.be
Followus
twitter.com/data_blend
www.datablend.be
www.datablend.be info@datablend.be 0499/05.00.89
datablend-continuum

Mais conteúdo relacionado

Mais procurados

The Ring programming language version 1.5.1 book - Part 44 of 180
The Ring programming language version 1.5.1 book - Part 44 of 180The Ring programming language version 1.5.1 book - Part 44 of 180
The Ring programming language version 1.5.1 book - Part 44 of 180Mahmoud Samir Fayed
 
MongoDBで作るソーシャルデータ新解析基盤
MongoDBで作るソーシャルデータ新解析基盤MongoDBで作るソーシャルデータ新解析基盤
MongoDBで作るソーシャルデータ新解析基盤Takahiro Inoue
 
MongoDB dla administratora
MongoDB dla administratora MongoDB dla administratora
MongoDB dla administratora 3camp
 
Pandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with codePandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with codeTim Hong
 
はじめてのMongoDB
はじめてのMongoDBはじめてのMongoDB
はじめてのMongoDBTakahiro Inoue
 
The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212Mahmoud Samir Fayed
 
Sokoban Game Development Using Java ( Updated using Screenshots & Class Diagr...
Sokoban Game Development Using Java ( Updated using Screenshots & Class Diagr...Sokoban Game Development Using Java ( Updated using Screenshots & Class Diagr...
Sokoban Game Development Using Java ( Updated using Screenshots & Class Diagr...British Council
 
The Ring programming language version 1.7 book - Part 63 of 196
The Ring programming language version 1.7 book - Part 63 of 196The Ring programming language version 1.7 book - Part 63 of 196
The Ring programming language version 1.7 book - Part 63 of 196Mahmoud Samir Fayed
 
Webinarserie: Einführung in MongoDB: “Back to Basics” - Teil 3 - Interaktion ...
Webinarserie: Einführung in MongoDB: “Back to Basics” - Teil 3 - Interaktion ...Webinarserie: Einführung in MongoDB: “Back to Basics” - Teil 3 - Interaktion ...
Webinarserie: Einführung in MongoDB: “Back to Basics” - Teil 3 - Interaktion ...MongoDB
 
Webinar: Building Your First App in Node.js
Webinar: Building Your First App in Node.jsWebinar: Building Your First App in Node.js
Webinar: Building Your First App in Node.jsMongoDB
 
Detection of errors and potential vulnerabilities in C and C++ code using the...
Detection of errors and potential vulnerabilities in C and C++ code using the...Detection of errors and potential vulnerabilities in C and C++ code using the...
Detection of errors and potential vulnerabilities in C and C++ code using the...Andrey Karpov
 
20110514 mongo dbチューニング
20110514 mongo dbチューニング20110514 mongo dbチューニング
20110514 mongo dbチューニングYuichi Matsuo
 
The Ring programming language version 1.5.2 book - Part 66 of 181
The Ring programming language version 1.5.2 book - Part 66 of 181The Ring programming language version 1.5.2 book - Part 66 of 181
The Ring programming language version 1.5.2 book - Part 66 of 181Mahmoud Samir Fayed
 
Improved Security Proof for the Camenisch- Lysyanskaya Signature-Based Synchr...
Improved Security Proof for the Camenisch- Lysyanskaya Signature-Based Synchr...Improved Security Proof for the Camenisch- Lysyanskaya Signature-Based Synchr...
Improved Security Proof for the Camenisch- Lysyanskaya Signature-Based Synchr...MASAYUKITEZUKA1
 
Mobile Game and Application with J2ME - Collision Detection
Mobile Gameand Application withJ2ME  - Collision DetectionMobile Gameand Application withJ2ME  - Collision Detection
Mobile Game and Application with J2ME - Collision DetectionJenchoke Tachagomain
 
Mobile Game and Application with J2ME
Mobile Gameand Application with J2MEMobile Gameand Application with J2ME
Mobile Game and Application with J2MEJenchoke Tachagomain
 
Exploring Canvas
Exploring CanvasExploring Canvas
Exploring CanvasKevin Hoyt
 

Mais procurados (20)

The Ring programming language version 1.5.1 book - Part 44 of 180
The Ring programming language version 1.5.1 book - Part 44 of 180The Ring programming language version 1.5.1 book - Part 44 of 180
The Ring programming language version 1.5.1 book - Part 44 of 180
 
MongoDBで作るソーシャルデータ新解析基盤
MongoDBで作るソーシャルデータ新解析基盤MongoDBで作るソーシャルデータ新解析基盤
MongoDBで作るソーシャルデータ新解析基盤
 
MongoDB dla administratora
MongoDB dla administratora MongoDB dla administratora
MongoDB dla administratora
 
Pandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with codePandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with code
 
はじめてのMongoDB
はじめてのMongoDBはじめてのMongoDB
はじめてのMongoDB
 
The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212The Ring programming language version 1.10 book - Part 56 of 212
The Ring programming language version 1.10 book - Part 56 of 212
 
Sokoban Game Development Using Java ( Updated using Screenshots & Class Diagr...
Sokoban Game Development Using Java ( Updated using Screenshots & Class Diagr...Sokoban Game Development Using Java ( Updated using Screenshots & Class Diagr...
Sokoban Game Development Using Java ( Updated using Screenshots & Class Diagr...
 
Asssignment2
Asssignment2 Asssignment2
Asssignment2
 
Ss
SsSs
Ss
 
The Ring programming language version 1.7 book - Part 63 of 196
The Ring programming language version 1.7 book - Part 63 of 196The Ring programming language version 1.7 book - Part 63 of 196
The Ring programming language version 1.7 book - Part 63 of 196
 
Webinarserie: Einführung in MongoDB: “Back to Basics” - Teil 3 - Interaktion ...
Webinarserie: Einführung in MongoDB: “Back to Basics” - Teil 3 - Interaktion ...Webinarserie: Einführung in MongoDB: “Back to Basics” - Teil 3 - Interaktion ...
Webinarserie: Einführung in MongoDB: “Back to Basics” - Teil 3 - Interaktion ...
 
Webinar: Building Your First App in Node.js
Webinar: Building Your First App in Node.jsWebinar: Building Your First App in Node.js
Webinar: Building Your First App in Node.js
 
Detection of errors and potential vulnerabilities in C and C++ code using the...
Detection of errors and potential vulnerabilities in C and C++ code using the...Detection of errors and potential vulnerabilities in C and C++ code using the...
Detection of errors and potential vulnerabilities in C and C++ code using the...
 
20110514 mongo dbチューニング
20110514 mongo dbチューニング20110514 mongo dbチューニング
20110514 mongo dbチューニング
 
Binomial heap
Binomial heapBinomial heap
Binomial heap
 
The Ring programming language version 1.5.2 book - Part 66 of 181
The Ring programming language version 1.5.2 book - Part 66 of 181The Ring programming language version 1.5.2 book - Part 66 of 181
The Ring programming language version 1.5.2 book - Part 66 of 181
 
Improved Security Proof for the Camenisch- Lysyanskaya Signature-Based Synchr...
Improved Security Proof for the Camenisch- Lysyanskaya Signature-Based Synchr...Improved Security Proof for the Camenisch- Lysyanskaya Signature-Based Synchr...
Improved Security Proof for the Camenisch- Lysyanskaya Signature-Based Synchr...
 
Mobile Game and Application with J2ME - Collision Detection
Mobile Gameand Application withJ2ME  - Collision DetectionMobile Gameand Application withJ2ME  - Collision Detection
Mobile Game and Application with J2ME - Collision Detection
 
Mobile Game and Application with J2ME
Mobile Gameand Application with J2MEMobile Gameand Application with J2ME
Mobile Game and Application with J2ME
 
Exploring Canvas
Exploring CanvasExploring Canvas
Exploring Canvas
 

Semelhante a MongoDB Analytics

All I know about rsc.io/c2go
All I know about rsc.io/c2goAll I know about rsc.io/c2go
All I know about rsc.io/c2goMoriyoshi Koizumi
 
How to leverage what's new in MongoDB 3.6
How to leverage what's new in MongoDB 3.6How to leverage what's new in MongoDB 3.6
How to leverage what's new in MongoDB 3.6Maxime Beugnet
 
Webinar: Applikationsentwicklung mit MongoDB : Teil 5: Reporting & Aggregation
Webinar: Applikationsentwicklung mit MongoDB: Teil 5: Reporting & AggregationWebinar: Applikationsentwicklung mit MongoDB: Teil 5: Reporting & Aggregation
Webinar: Applikationsentwicklung mit MongoDB : Teil 5: Reporting & AggregationMongoDB
 
MongoDB Europe 2016 - Debugging MongoDB Performance
MongoDB Europe 2016 - Debugging MongoDB PerformanceMongoDB Europe 2016 - Debugging MongoDB Performance
MongoDB Europe 2016 - Debugging MongoDB PerformanceMongoDB
 
A miało być tak... bez wycieków
A miało być tak... bez wyciekówA miało być tak... bez wycieków
A miało być tak... bez wyciekówKonrad Kokosa
 
Malli: inside data-driven schemas
Malli: inside data-driven schemasMalli: inside data-driven schemas
Malli: inside data-driven schemasMetosin Oy
 
Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...
Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...
Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...DevGAMM Conference
 
Presto in Treasure Data (presented at db tech showcase Sapporo 2015)
Presto in Treasure Data (presented at db tech showcase Sapporo 2015)Presto in Treasure Data (presented at db tech showcase Sapporo 2015)
Presto in Treasure Data (presented at db tech showcase Sapporo 2015)Mitsunori Komatsu
 
Beyond php it's not (just) about the code
Beyond php   it's not (just) about the codeBeyond php   it's not (just) about the code
Beyond php it's not (just) about the codeWim Godden
 
MongoDB World 2019: Event Horizon: Meet Albert Einstein As You Move To The Cloud
MongoDB World 2019: Event Horizon: Meet Albert Einstein As You Move To The CloudMongoDB World 2019: Event Horizon: Meet Albert Einstein As You Move To The Cloud
MongoDB World 2019: Event Horizon: Meet Albert Einstein As You Move To The CloudMongoDB
 
Computer science project work on C++
Computer science project work on C++Computer science project work on C++
Computer science project work on C++NARESH KUMAR
 
Regression and Classification with R
Regression and Classification with RRegression and Classification with R
Regression and Classification with RYanchang Zhao
 
MongoDB Aggregation Framework in action !
MongoDB Aggregation Framework in action !MongoDB Aggregation Framework in action !
MongoDB Aggregation Framework in action !Sébastien Prunier
 
Best Bugs from Games: Fellow Programmers' Mistakes
Best Bugs from Games: Fellow Programmers' MistakesBest Bugs from Games: Fellow Programmers' Mistakes
Best Bugs from Games: Fellow Programmers' MistakesAndrey Karpov
 
TDC2018SP | Trilha .Net - Novidades do C# 7 e 8
TDC2018SP | Trilha .Net - Novidades do C# 7 e 8TDC2018SP | Trilha .Net - Novidades do C# 7 e 8
TDC2018SP | Trilha .Net - Novidades do C# 7 e 8tdc-globalcode
 
1403 app dev series - session 5 - analytics
1403   app dev series - session 5 - analytics1403   app dev series - session 5 - analytics
1403 app dev series - session 5 - analyticsMongoDB
 
Py conkr 20150829_docker-python
Py conkr 20150829_docker-pythonPy conkr 20150829_docker-python
Py conkr 20150829_docker-pythonEric Ahn
 

Semelhante a MongoDB Analytics (20)

All I know about rsc.io/c2go
All I know about rsc.io/c2goAll I know about rsc.io/c2go
All I know about rsc.io/c2go
 
MongoDB With Style
MongoDB With StyleMongoDB With Style
MongoDB With Style
 
How to leverage what's new in MongoDB 3.6
How to leverage what's new in MongoDB 3.6How to leverage what's new in MongoDB 3.6
How to leverage what's new in MongoDB 3.6
 
Mongo db dla administratora
Mongo db dla administratoraMongo db dla administratora
Mongo db dla administratora
 
Webinar: Applikationsentwicklung mit MongoDB : Teil 5: Reporting & Aggregation
Webinar: Applikationsentwicklung mit MongoDB: Teil 5: Reporting & AggregationWebinar: Applikationsentwicklung mit MongoDB: Teil 5: Reporting & Aggregation
Webinar: Applikationsentwicklung mit MongoDB : Teil 5: Reporting & Aggregation
 
MongoDB Europe 2016 - Debugging MongoDB Performance
MongoDB Europe 2016 - Debugging MongoDB PerformanceMongoDB Europe 2016 - Debugging MongoDB Performance
MongoDB Europe 2016 - Debugging MongoDB Performance
 
A miało być tak... bez wycieków
A miało być tak... bez wyciekówA miało być tak... bez wycieków
A miało być tak... bez wycieków
 
Malli: inside data-driven schemas
Malli: inside data-driven schemasMalli: inside data-driven schemas
Malli: inside data-driven schemas
 
Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...
Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...
Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...
 
Presto in Treasure Data (presented at db tech showcase Sapporo 2015)
Presto in Treasure Data (presented at db tech showcase Sapporo 2015)Presto in Treasure Data (presented at db tech showcase Sapporo 2015)
Presto in Treasure Data (presented at db tech showcase Sapporo 2015)
 
Beyond php it's not (just) about the code
Beyond php   it's not (just) about the codeBeyond php   it's not (just) about the code
Beyond php it's not (just) about the code
 
MongoDB World 2019: Event Horizon: Meet Albert Einstein As You Move To The Cloud
MongoDB World 2019: Event Horizon: Meet Albert Einstein As You Move To The CloudMongoDB World 2019: Event Horizon: Meet Albert Einstein As You Move To The Cloud
MongoDB World 2019: Event Horizon: Meet Albert Einstein As You Move To The Cloud
 
Computer science project work on C++
Computer science project work on C++Computer science project work on C++
Computer science project work on C++
 
Regression and Classification with R
Regression and Classification with RRegression and Classification with R
Regression and Classification with R
 
MongoDB Aggregation Framework in action !
MongoDB Aggregation Framework in action !MongoDB Aggregation Framework in action !
MongoDB Aggregation Framework in action !
 
Best Bugs from Games: Fellow Programmers' Mistakes
Best Bugs from Games: Fellow Programmers' MistakesBest Bugs from Games: Fellow Programmers' Mistakes
Best Bugs from Games: Fellow Programmers' Mistakes
 
MongoDB
MongoDBMongoDB
MongoDB
 
TDC2018SP | Trilha .Net - Novidades do C# 7 e 8
TDC2018SP | Trilha .Net - Novidades do C# 7 e 8TDC2018SP | Trilha .Net - Novidades do C# 7 e 8
TDC2018SP | Trilha .Net - Novidades do C# 7 e 8
 
1403 app dev series - session 5 - analytics
1403   app dev series - session 5 - analytics1403   app dev series - session 5 - analytics
1403 app dev series - session 5 - analytics
 
Py conkr 20150829_docker-python
Py conkr 20150829_docker-pythonPy conkr 20150829_docker-python
Py conkr 20150829_docker-python
 

Mais de datablend

Coalition cocktail - hack the elections
Coalition cocktail - hack the electionsCoalition cocktail - hack the elections
Coalition cocktail - hack the electionsdatablend
 
The Power of Graphs to Analyze Biological Data
The Power of Graphs to Analyze Biological DataThe Power of Graphs to Analyze Biological Data
The Power of Graphs to Analyze Biological Datadatablend
 
Introduction to Graph Databases @ SAI
Introduction to Graph Databases @ SAIIntroduction to Graph Databases @ SAI
Introduction to Graph Databases @ SAIdatablend
 
FluxGraph @ GraphDevRoom
FluxGraph @ GraphDevRoomFluxGraph @ GraphDevRoom
FluxGraph @ GraphDevRoomdatablend
 
The power of graphs to analyze biological data
The power of graphs to analyze biological dataThe power of graphs to analyze biological data
The power of graphs to analyze biological datadatablend
 
FluxGraph: a time-machine for your graphs
FluxGraph: a time-machine for your graphsFluxGraph: a time-machine for your graphs
FluxGraph: a time-machine for your graphsdatablend
 
8 things I like about Datomic
8 things I like about Datomic8 things I like about Datomic
8 things I like about Datomicdatablend
 

Mais de datablend (7)

Coalition cocktail - hack the elections
Coalition cocktail - hack the electionsCoalition cocktail - hack the elections
Coalition cocktail - hack the elections
 
The Power of Graphs to Analyze Biological Data
The Power of Graphs to Analyze Biological DataThe Power of Graphs to Analyze Biological Data
The Power of Graphs to Analyze Biological Data
 
Introduction to Graph Databases @ SAI
Introduction to Graph Databases @ SAIIntroduction to Graph Databases @ SAI
Introduction to Graph Databases @ SAI
 
FluxGraph @ GraphDevRoom
FluxGraph @ GraphDevRoomFluxGraph @ GraphDevRoom
FluxGraph @ GraphDevRoom
 
The power of graphs to analyze biological data
The power of graphs to analyze biological dataThe power of graphs to analyze biological data
The power of graphs to analyze biological data
 
FluxGraph: a time-machine for your graphs
FluxGraph: a time-machine for your graphsFluxGraph: a time-machine for your graphs
FluxGraph: a time-machine for your graphs
 
8 things I like about Datomic
8 things I like about Datomic8 things I like about Datomic
8 things I like about Datomic
 

Último

Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingRepurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingEdi Saputra
 
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProduct Anonymous
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoffsammart93
 
Top 10 Most Downloaded Games on Play Store in 2024
Top 10 Most Downloaded Games on Play Store in 2024Top 10 Most Downloaded Games on Play Store in 2024
Top 10 Most Downloaded Games on Play Store in 2024SynarionITSolutions
 
A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?Igalia
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024Rafal Los
 
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Miguel Araújo
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MIND CTI
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobeapidays
 
GenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdfGenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdflior mazor
 
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...Principled Technologies
 
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Drew Madelung
 
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...Neo4j
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024The Digital Insurer
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonAnna Loughnan Colquhoun
 
Boost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdfBoost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdfsudhanshuwaghmare1
 
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyKhushali Kathiriya
 

Último (20)

Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingRepurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
 
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
 
Top 10 Most Downloaded Games on Play Store in 2024
Top 10 Most Downloaded Games on Play Store in 2024Top 10 Most Downloaded Games on Play Store in 2024
Top 10 Most Downloaded Games on Play Store in 2024
 
A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024
 
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
GenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdfGenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdf
 
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
 
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt Robison
 
Boost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdfBoost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdf
 
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 

MongoDB Analytics

  • 5. chemicalsimilarity(1) ★ 31millioncompoundsavailable ➡ pubchem ➡ Question: ★ findcompoundssimilartoa particularothercompound
  • 6. chemicalsimilarity(2) 0[N]1[C  O]2[C  C  C] 0[N]1[C  O]2[C  C  C]3[C  C  C  C  C] 0[C]1[C  C  C]2[C  C  N  O]3[C  C  C  C  O  O] 0[C]1[C  C]2[C  C  C  C  O]3[C  C  N  O] 0[O]1[C]2[C  O]3[C  C  C] 0[C]1[C  O  O]2[C  C  C  O] 0[C]1[C  C]2[C  C] 0[C]1[C]2[C]3[C  O] 0[C]1[C  C  N]2[C  C  C  C  O]3[C  C  C  O] ...
  • 7. chemicalsimilarity(3) 0[N]1[C  O]2[C  C  C] 0[N]1[C  O]2[C  C  C]3[C  C  C  C  C] 0[C]1[C  C  C]2[C  C  N  O]3[C  C  C  C  O  O] 0[C]1[C  C]2[C  C  C  C  O]3[C  C  N  O] 0[O]1[C]2[C  O]3[C  C  C] 0[C]1[C  O  O]2[C  C  C  O] 0[C]1[C  C]2[C  C] 0[C]1[C]2[C]3[C  O] 0[C]1[C  C  N]2[C  C  C  C  O]3[C  C  C  O] ... 0[N]1[C  O]2[C  C  C]3[C  C  C  C  C  C] 0[C]1[C  C  C]2[C  C  N  O]3[C  C  C  C  O  O] 0[C]1[C  C]2[C  C  C  C  O]3[C  C  N  O] 0[O]1[C]2[C  O]3[C  C  C  C] 0[C]1[C  O  O]2[C  C  C  O] 0[C]1[C  C]2[C  C] 0[N]1[C  O]2[C  C  C] 0[C]1[C]2[C]3[C  O] 0[C]1[C  C  N]2[C  C  C  C  O]3[C  C  C  O] ... equalityviatanimoto but31millioncalculations?
  • 8. mongodbdatamodel(1) {          "compound_cid"  :  "46200001"  ,          "smiles"  :  "CCC1C(C(C(C(=NOCC=CCN2CCCCC2)C(CC(C(C(C(C(C(=O)O1)C)OC3C"  ,        "fingerprint_count"  :  120  ,          "fingerprints"  :  [                  "0[N]1[C  O]2[C  C  C]"  ,                "0[N]1[C  O]2[C  C  C]3[C  C  C  C  C]"  ,                "0[C]1[C  C  C]2[C  C  N  O]3[C  C  C  C  O  O]"  ,                "0[C]1[C  C]2[C  C  C  C  O]3[C  C  N  O]"  ,                "0[O]1[C]2[C  O]3[C  C  C]"  ,                  "0[C]1[C  O  O]2[C  C  C  O]"  ,                  "0[C]1[C  C]2[C  C]"  ,                  "0[C]1[C]2[C]3[C  O]"  ,                  "0[C]1[C  C  N]2[C  C  C  C  O]3[C  C  C  O]"  ,                ...  ]  ,   } compound collection
  • 9. mongodbdatamodel(2) fingerprint collection {          "fingerprint"  :  "0[N]1[C  O]2[C  C  C]",        "count"  :  472 } {          "fingerprint"  :  "0[N]1[C  O]2[C  C  C]3[C  C  C  C  C]",        "count"  :  41 } {        "fingerprint"  :  "0[O]1[C]2[C  O]3[C  C  C]",        "count"  :  1343 }
  • 10. queryingpattern(1) ★ from31million->potentialmatch ➡ narrowdownthesearchspace ➡ imagine80%searchforacompoundwith40features ➡ 32 ➡ 50
  • 11. queryingpattern(2) ★ from31million->potentialmatch ➡ narrowdownthesearchspace ➡ imagine80%searchforacompoundwith40features (9fingerprints)
  • 12. findthefingerprints(1) //  Retrieve  the  particular  compound DBObject  object  =   compoundsCollection.findOne(QueryBuilder.start("compound_cid").is(compound).get()); //  Retrieve  the  relevant  properties String  pubchemcid  =  (String)object.get(COMPOUNDCID_PROPERTY); List<Integer>  fingerprintstofind  =   Arrays.asList(((BasicDBList)object.get(FINGERPRINTS_PROPERTY)).toArray(new   Integer[]{})); //  Sort  the  fingerprints  on  total  number  of  occurences fingerprintstofind  =  findSortedFingerprints(fingerprintstofind);
  • 13. findthefingerprints(2) List<Integer>  sortedFingerprintsToFind  =  new  ArrayList<Integer>();    //  Find  all  fingerprint  count  documents    DBObject  fingerprintcountquery  =          QueryBuilder.start(FINGERPRINT_PROPERTY).in(fingerprintsToFind.toArray()).get();        //  Only  retrieve  the  fingerprint  string  itself    DBObject  fingerprintcountselection  =          QueryBuilder.start(FINGERPRINT_PROPERTY).is(1).get();                    //  Sort  the  result  on  count    DBObject  fingerprintcountsort  =  QueryBuilder.start(COUNT_PROPERTY).is(1).get();    //  Execute  the  query  on  the  fingerprint  counts  collection    DBCursor  fingerprintcounts  =          fingerprintCountsCollection.find(fingerprintcountquery,  fingerprintcountselection).        sort(fingerprintcountsort);
  • 14. nativequery(1) //  Find  the  matching  compounds DBObject  compoundquery  =      QueryBuilder.        start(FINGERPRINTS_PROPERTY).        in(fingerprintsToConsider).        and(FINGERPRINTCOUNT_PROPERTY).lessThanEquals(maxnumberofcompoundfingerprints).        and(FINGERPRINTCOUNT_PROPERTY).greaterThanEquals(minnumberofcompoundfingerprints).        get();
  • 15. nativequery(2) //  Execute  the  query DBCursor  compounds  =  compoundsCollection.find(compoundquery);     //  Let's  process  the  found  compounds  locally while(compounds.hasNext())  {    DBObject  compound  =  compounds.next();    BasicDBList  fingerprints  =  ((BasicDBList)            compound.get(FINGERPRINTS_PROPERTY));        //  Calculate  the  intersection  on  the  total  list  of  fingerprints    fingerprints.retainAll(fingerprintsToFind);    if  (fingerprints.size()  >=  minnumberofcompoundfingerprints)  {        //  Calculate  the  tanimoto  coefficient  ...    } }                                
  • 17. map/reducequery(2)  //  Find  all  compounds DBObject  compoundquery  =  ...   //  The  map  fuction String  map  =  "function()  {    "  +                          "    var  found  =  0;  "  +                          "    var  fingerprintslength  =  this.fingerprints.length;  "  +                          "    for  (i  =  0;  i  <  fingerprintslength;  i++)  {  "  +                          "        if  (fingerprintstofind[this.fingerprints[i]]  ===  true)  {  found++;  }  "  +                          "    }  "  +                          "    if  (found  >=  minnumberofcompoundfingerprints)  {  "  +                          "        emit  (this.compound_cid,  {found  :  found,  "  +                            "                                                            total  :  this.fingerprint_count}  );  }  "  +                          "}"; //  Execute  the  map  reduce  function MapReduceCommand  mr  =  new  MapReduceCommand(compoundsCollection,  map,  "",      MapReduceCommand.OutputType.INLINE,  compoundquery);                              
  • 19. aggregationframework(2) {  "aggregate"  :  "compounds"  ,      "pipeline"  :  [            {  "$match"  :  {  "fingerprint_count"  :  {  "$gte"  :  4  ,  "$lte"  :  1780}}}  ,            {  "$unwind"  :  "$fingerprints"}  ,            {  "$match"  :  {  "fingerprints"  :  {  "$in"  :  [  1960,  15111,  ...,94  ,  26]}}}  ,              {  "$group"  :  {  "_id"  :  "$compound_cid"  ,                                          "fingerprintmatches"  :  {  "$sum"  :  1}  ,                                          "totalcount"  :  {  "$first"  :  "$fingerprint_count"}  }}}  ,              {  "$project"  :  {  "_id"  :  1  ,                                              "tanimoto"  :  {  "$divide"  :  [  "$fingerprintmatches"  ,                                                                        {  "$subtract"  :  [  {  "$add"  :  [  89  ,  "$totalcount"]}  ,                                                                            "$fingerprintmatches"]}]}}  ,            {  "$match"  :  {  "tanimoto"  :  {  "$gte"  :  0.05}}}]}                              
  • 20. benchmarkresults ★ native->202ms ➡ 100Kcompounds,0.8tanimoto ★ map/reduce->214ms ★ aggregationframework->609ms ★ native->1909ms ➡ 100Kcompounds,0.05tanimoto ★ map/reduce->20978ms ★ aggregationframework->1613ms