diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..920b466 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/build/ +*.d +*.o +/strfry +/strfry-db/*.mdb diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..fd042f5 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "golpe"] + path = golpe + url = https://github.com/hoytech/golpe.git diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..708da0b --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ +BIN = strfry +OPT = -O3 -g + +include golpe/rules.mk + +LDLIBS += -lsecp256k1 -lb2 diff --git a/README.md b/README.md new file mode 100644 index 0000000..48d1506 --- /dev/null +++ b/README.md @@ -0,0 +1,270 @@ +# strfry - a nostr relay + +strfry is a relay for the [nostr protocol](https://github.com/nostr-protocol/nostr) + +* Supports most applicable NIPs: 1, 9, 11, 12, 15, 16, 20, 22 +* No external database required: All data is stored locally on the filesystem in LMDB +* Hot reloading of config file: No server restart needed for many config param changes +* Websocket compression: permessage-deflate with optional sliding window, when supported by clients +* Built-in support for real-time streaming (up/down/both) events from remote relays, and bulk import/export of events from/to jsonl files +* Merkle-tree based set reconcilliation for efficient syncing with remote relays + +**NOTE**: This project is still in development/testing phase, so you may not want to use it in production yet. + + +## Syncing + +The most original feature of strfry is a set reconcillation protocol based on [Quadrable](https://github.com/hoytech/quadrable). This is implemented over a protocol extension called "yesstr", which is primarily designed for relay-to-relay communication, but could also be used by sophisticated clients. Yesstr allows two parties to synchronise their sets of stored messages with minimal bandwidth overhead. + +Either the full set of messages in the DB can be synced, or the results of one or more nostr filter expressions. If the two parties to the sync share common subsets of identical events, then there will be significant bandwidth savings compared to downloading the full set. + + + +## Usage + +### Compile + +A C++20 compiler is required, along with a few other common dependencies. On Debian/Ubuntu use these commands: + + sudo apt install -y git build-essential libyaml-perl libtemplate-perl libssl-dev zlib1g-dev liblmdb-dev libflatbuffers-dev libsecp256k1-dev libb2-dev + git submodule update --init + make setup-golpe + make -j4 + +### Running a relay + +Here is how to run the relay: + + ./strfry relay + +For dev/testing, the config file `./strfry.conf` is used by default. It stores data in the `./strfry-db/` directory. + +In production, you'll probably want a systemd unit file and a reverse proxy such as nginx (details coming soon). + +### Importing data + +The `strfry import` command reads line-delimited JSON (jsonl) from its standard input and imports events that validate into the DB in batches of 10,000 at a time: + + cat my-nostr-dump.jsonl | ./strfry import + +* By default, it will verify the signatures and other fields of the events. If you know the messages are valid, you can speed up the import a bit by passing the `--no-verify` flag. + +### Exporting data + +The `strfry export` command will print events from the DB to standard output in jsonl, ordered by their `created_at` field (ascending). + +Optionally, you can limit the time period exported with the `--since` and `--until` flags. + + +### Stream + +This command opens a websocket connection to the specified relay and makes a nostr `REQ` request with filter `{"limit":0}`: + + ./strfry stream wss://relay.example.com + +All events that are streamed back are inserted into the DB (after validation, checking for duplicates, etc). If the connection is closed for any reason, the command will try reconnecting every 5 seconds. + +You can also run it in the opposite direction, which monitors your local DB for any new events and posts them to the specified relay: + + ./strfry stream wss://relay.example.com --dir up + +Both of these operations can be concurrently multiplexed over the same websocket: + + ./strfry stream wss://relay.example.com --dir both + +`strfry stream` will compress messages with permessage-deflate in both directions, if supported by the server. Sliding window is not supported for now. + + +### Sync + +This command uses the yesstr protocol and performs a merkle-tree set reconcilliation against the specified relay. + +Effectively what this does is figure out which events the remote relay has that you don't, and vice versa. Assuming that you both have common subsets of events, it does this more efficiently than simply transferring the full set of events (or even just their ids). + +You can read about the algorithm used on the [Quadrable project page](https://github.com/hoytech/quadrable#syncing). For now, the only implementation is in C++, although we plan on compiling this into WASM so the protocol can also be used by JS clients. + +Here is how to perform a "full DB" set reconcilliation against a remote server: + + ./strfry sync wss://relay.example.com + +This will download all missing events from the remote relay and insert them into your DB. Similar to `stream`, you can also sync in the `up` or `both` directions (not implemented yet, coming soon): + + ./strfry sync wss://relay.example.com --dir both ## coming soon + +`both` is especially efficient, because performing the set reconcilliation automatically determines the missing members on each side. + +Instead of a "full DB" sync, you can also sync the result of a nostr filter (or multiple filters, use a JSON array of them): + + ./strfry sync wss://relay.example.com '{"authors":["003b"]}' + +Because many messages can be batched into a single yesstr websocket message, permessage-deflate compression can also make syncing more bandwidth-efficient when bulk-loading data over the network compared to regular nostr. + +Warning: Syncing can consume a lot of memory and bandwidth if the DBs are highly divergent (for example if your local DB is empty and your filter matches many events). The sync doesn't begin to commit received events to your DB until it has downloaded the entire set (but it is possible to improve this). + + + +## Architecture + +strfry uses concepts from various proprietary systems I have worked on in the past but consists solely of independently-developed open source code. + +The [golpe](https://github.com/hoytech/golpe) application framework is used for basic services such as command-line arg parsing, logging, config files, etc. + +### Database + +strfry is built on the embedded [LMDB](https://www.symas.com/lmdb) database (using the [lmdbxx](https://github.com/hoytech/lmdbxx/) C++ interface). This means that records are accessed directly from the page cache. The read data-path requires no locking/system calls and it scales optimally with additional cores. + +Database records are serialised with [Flatbuffers](https://google.github.io/flatbuffers/) serialisation, which allows fast and zero-copy access to individual fields within the records. A [RasgueaDB](https://github.com/hoytech/rasgueadb) layer is used for maintaining indices and executing queries. + +The query engine is quite a bit less flexible than a general-purpose SQL engine, however the types of queries that can be performed via the nostr protocol are fairly constrained, so we can ensure that almost all queries have good index support. All possible queries plans are determined at compile-time, so there is no SQL generation/parsing overhead, or risk of SQL injection. + +When an event is inserted, indexable data (id, pubkey, tags, kind, and created_at) is loaded into a flatbuffers object. Signatures and non-indexed tags are removed, along with recommended relay fields, etc, to keep the record size minimal (and therefore improve cache usage). The full event's raw JSON is stored separately. + +Various indices are created based on the indexed fields. Almost all indices are "clustered" with the event's `created_at` timestamp, allowing efficient `since`/`until` scans. Many queries can be serviced by index-only scans, and don't need to load the flatbuffers object at all. + +I've tried to build the query engine with efficiency and performance in mind, but it is possible a SQL engine could find better execution plans, perhaps depending on the query. I haven't done any benchmarking or profiling yet, so your mileage may vary. + +One benefit of a custom query engine is that we have the flexibility to optimise it for real-time streaming use-cases more than we could a general-purpose DB. For example, a user on a slow connection should not unnecessarily tie up resources. Our query engine supports pausing a query and storing it (it takes up a few hundred to a few thousand bytes, depending on query complexity), and resuming it later when the client's socket buffer has drained. Additionally, we can pause long-running queries to satisfy new queries as quickly as possible. This is all done without any data-base thread pools. There *are* worker threads, but they only exist to take advantage of multiple CPUs, not to block on client I/O. + + +### Threads and Inboxes + +strfry starts multiple OS threads that communicate with each-other via two channels: + +* Non-copying message queues +* The LMDB database + +This means that no in-memory data-structures are accessed concurrently. This is sometimes called "shared nothing" architecture. + +Each individual thread has an "inbox". Typically a thread will block waiting for a batch of messages to arrive in its inbox, process them, queue up new messages in the inboxes of other threads, and repeat. + +### Websocket + +This thread is responsible for accepting new websocket connections, routing incoming requests to the Ingesters, and replying with responses. + +The Websocket thread is a single thread that multiplexes IO to/from multiple connections using the most scalable OS-level interface available (for example, epoll on Linux). It uses [my fork of uWebSockets](https://github.com/hoytech/uWebSockets). + +Since there is only one of these threads, it is critical for system latency that it perform as little CPU-intensive work as possible. No request parsing or JSON encoding/decoding is done on this thread, nor any DB operations. + +The Websocket thread does however handle compression and TLS, if configured. In production it is recommended to terminate TLS before strfry, for example with nginx. + +#### Compression + +If supported by the client, compression can reduce bandwidth consumption and improve latency. + +Compression can run in two modes, either "per-message" or "sliding-window". Per-message uses much less memory, but it cannot take advantage of cross-message redundancy. Sliding-window uses more memory for each client, but the compression is typically better since nostr messages contain serially redundancy (subIds, repeated pubkeys and event IDs in subsequent messages, etc). + +The CPU usage of compression is typically small enough to make it worth it. However, strfry also supports running multiple independent strfry instances on the same machine (using the same DB backing store). This can distribute the compression overhead over several threads, according to the kernel's `REUSE_PORT` policy. + +### Ingester + +These threads perform the CPU-intensive work of processing incoming messages: + +* Decoding JSON +* Validating and hashing new events +* Verifying event signatures +* Compiling filters + +A particular connection's requests are always routed to the same ingester. + +### Writer + +This thread is responsible for most DB writes: + +* Adding new events to the DB +* Maintaining the Quadrable merkle tree +* Performing event deletion (NIP-09) +* Deleting replaceable events (NIP-16) + +It is important there is only 1 writer thread, because LMDB has an exclusive-write lock, so multiple writers would imply contention. Additionally, when multiple events queue up, there is work that can be amortised across the batch. This serves as a natural counterbalance against high write volumes. + +### ReqWorker + +Incoming `REQ` messages have two stages. The first stage is retrieving "old" data that already existed in the DB at the time of the request. + +Servicing this stage is the job of the ReqWorker thread pool. Like Ingester, messages are consistently delivered to a thread according to connection ID. This is important so that (for example) CLOSE messages are matched with corresponding REQs. + +When this stage is complete the next stage (monitoring) begins. When a ReqWorker thread completes the first stage for a subscription, the subscription is then sent to a ReqMonitor thread. ReqWorker is also responsible for forwarding unsubscribe (`CLOSE`) and socket disconnection messages to ReqMonitor. This forwarding is necessary to avoid a race condition where a message closing a subscription would be delivered while that subscription is pending in the ReqMonitor thread's inbox. + +#### Filters + +In nostr, each `REQ` message from a subscriber can contain multiple filters. We call this collection a `FilterGroup`. If one or more of the filters in the group matches an event, that event should be sent to the subscriber. + +A `FilterGroup` is a vector of `Filter` objects. When the Ingester receives a `REQ`, the JSON filter items are compiled into `Filter`s and the original JSON is discarded. Each filter item's specified fields are compiled into sorted lookup tables called filter sets. + +In order to determine if an event matches against a `Filter`, first the `since` and `until` fields are checked. Then, each field of the event for which a filter item was specified is looked up in the corresponding lookup table. Specifically, the upper-bound index is determined using a binary search (for example `std::upper_bound`). This is the first element greater than the event's item. Then the preceeding table item is checked for either a prefix (`ids`/`authors`) or exact (everything else) match. + +Since testing `Filter`s against events is performed so frequently, it is a performance-critical operation and some optimisations have been applied. For example, each filter item in the lookup table is represented by a 4 byte data structure, one of which is the first byte of the field and the rest are offset/size lookups into a single memory allocation containing the remaining bytes. Under typical scenarios, this will greatly reduce the amount of memory that needs to be loaded to process a filter. Filters with 16 or fewer items can often be rejected with the load of a single cache line. Because filters aren't scanned linearly, the number of items in a filter (ie amount of pubkeys) doesn't have a significant impact on processing resources. + +#### DBScan + +The DB querying engine used by ReqWorker is called `DBScan`. This engine is designed to take advantage of indices that have been added to the database. The indices have been selected so that no filters require full table scans (over the `created_at` index), except ones that only use `since`/`until` (or nothing). + +Because events are stored in the same flatbuffers format in memory and "in the database" (there isn't really any difference with LMDB), compiled filters can be applied to either. + +When a user's `REQ` is being processed for the initial "old" data, each `Filter` in its `FilterGroup` is analysed and the best index is determined according to a simple heuristic. For each filter item in the `Filter`, the index is scanned backwards starting at the upper-bound of that filter item. Because all indices are composite keyed with `created_at`, the scanner also jumps to the `until` time when possible. Each event is compared against the compiled `Filter` and, if it matches, sent to the Websocket thread to be sent to the subscriber. The scan completes when one of the following is true: + +* The key no longer matches the filter item (exact or prefix, depending on field) +* The event's `created_at` is before the `since` filter field +* The filter's `limit` field of delivered events has been reached + +Once this completes, a scan begins for the next item in the filter field. Note that a filter only ever uses one index. If a filter specifies both `ids` and `authors`, only the `ids` index will be scanned. The `authors` filters will be applied when the whole filter is matched prior to sending. + +An important property of `DBScan` is that queries can be paused and resumed with minimal overhead. This allows us to ensure that long-running queries don't negatively affect the latency of short-running queries. When ReqWorker first receives a query, it creates a DBScan for it. The scan will be run with a "time budget" (for example 10 milliseconds). If this is exceeded, the query is put to the back of a queue and new queries are checked for. This means that new queries will always be processed before resuming any queries that have already run for 10ms. + + +### ReqMonitor + +The second stage of a REQ request is comparing newly-added events against the REQ's filters. If they match, the event should be sent to the subscriber. + +ReqMonitor is not directly notified when new events have been written. This is important because new events can be added in a variety of ways. For instance, the `strfry import` command, event syncing, and multiple independent strfry servers using the same DB (ie, `REUSE_PORT`). + +Instead, ReqMonitor watches for file change events using the OS's inotify API. When the file has changed, it scans all the events that were added to the DB since the last time it ran. + +Note that because of this design decision, ephemeral events work differently than in other relay implementations. They *are* stored to the DB, however they have a very short retention-policy lifetime and will be deleted after 5 minutes (by default). + +#### ActiveMonitors + +Even though filter scanning is quite fast, strfry further attempts to optimise the case where a large number of concurrent REQs need to be monitored for. + +When ReqMonitor first receives a subscription, it first compares its filter group against all the events that have been written since the subscription's DBScan started (since those are omitted from DBScan). + +After the subscription is all caught up to the current transaction's snapshot, the filter group is broken up into its individual filters, and then each filter has one field selected (because all fields in a query must have a match, it is sufficient to choose one). This field is broken up into its individual filter items (ie a list of `ids`) and these are added to a sorted data-structure called a monitor set. + +Whenever a new event is processed, all of its fields are looked up in the various monitor sets, which provides a list of filters that should be fully processed to check for a match. If an event has no fields in common with a filter, a match will not be attempted for this filter. + +For example, for each prefix in the `authors` field in a filter, an entry is added to the `allAuthors` monitor set. When a new event is subsequently detected, the `pubkey` is looked up in `allAuthors` according to a binary search. Then the data-structure is scanned until it stops seeing records that are prefix matches against the `pubkey`. All of these matching records are pointers to corresponding `Filter`s of the REQs that have subscribed to this author. The filters must then be processed to determine if the event satisfies the other parameters of each filter (`since`/`until`/etc). + +After comparing the event against each filter detected via the inverted index, that filter is marked as "up-to-date" with this event's ID, whether the filter matched or not. This prevents needlessly re-comparing this index against the same event in the future (in case one of the *other* index lookups matches it). If a filter *does* match, then the entire filter group is marked as up-to-date. This prevents sending the same event multiple times in case multiple filters in a filter group match, and also prevents needlessly comparing other filters in the group against an event that has already been sent. + +After an event has been processed, all the matching connections and subscription IDs are sent to the Websocket thread along with a single copy of the event's JSON. This prevents intermediate memory bloat that would occur if a copy was created for each subscription. + + +### Yesstr + +This thread implements the provider-side of the Quadrable [syncing protocol](https://github.com/hoytech/quadrable#syncing). More details coming soon... + + +### Cron + +This thread is responsible for periodic maintenance operations. Currently this consists of applying a retention-policy and deleting ephemeral events. + + +## Testing + +The query engine is the most complicated part of the relay, so there is a differential fuzzing test framework to exercise it. + +To bootstrap the tests, we load in a set of [real-world nostr events](https://wiki.wellorder.net/wiki/nostr-datasets/). + +There is a simple but inefficient filter implementation in `test/dumbFilter.pl` that can be used to check if an event matches a filter. In a loop, we randomly generate a complicated filter group and pipe the entire DB's worth of events through the dumb filter and record which events it matched. Next, we perform the query using strfry's query engine (using a `strfry scan`) and ensure it matches. This gives us confidence that querying for "old" records in the DB will be performed correctly. + +Next, we need to verify that monitoring for "new" records will function also. For this, in a loop we create a set of hundreds of random filters and install them in the monitoring engine. One of which is selected as a sample. The entire DB's worth of events is "posted to the relay" (actually just iterated over in the DB using `strfry monitor`), and we record which events were matched. This is then compared against a full-DB scan using the same query. + +Both of these tests have run for several hours with no observed failures. + + + +## Author and Copyright + +strfry © 2023 Doug Hoyte. + +GPLv3 license. See the LICENSE file. diff --git a/TODO b/TODO new file mode 100644 index 0000000..971ef7d --- /dev/null +++ b/TODO @@ -0,0 +1,25 @@ +features + finish syncing + * logging of bytes up/down + * up/both directions + * error handling and reporting + * way to close sync request + * limit on number of concurrent sync requests + * full-db scan limited by since/until + config for compression + less verbose default logging + nice new config "units" feature, is 1d instead of 86400 + make it easier for a thread to setup a quadrable object + opt: PubkeyKind scans could be done index-only + +rate limits + slow-reader detection and back-pressure + max connections per ip (nginx?) + max bandwidth up/down (nginx?) + event writes per second per ip + max number of concurrent REQs per connection/ip + ? limit on total number of events from a DBScan, not just per filter + +misc + periodic reaping of disconnected sockets + ? websocket-level pings diff --git a/fbs/nostr-index.fbs b/fbs/nostr-index.fbs new file mode 100644 index 0000000..e5d5858 --- /dev/null +++ b/fbs/nostr-index.fbs @@ -0,0 +1,17 @@ +namespace NostrIndex; + +table Tag { + key: uint8; + val: [ubyte]; +} + +table Event { + id: [ubyte]; + pubkey: [ubyte]; + created_at: uint64; + kind: uint64; + tags: [Tag]; +} + +table Empty {} +root_type Empty; diff --git a/fbs/yesstr.fbs b/fbs/yesstr.fbs new file mode 100644 index 0000000..e8a2eaa --- /dev/null +++ b/fbs/yesstr.fbs @@ -0,0 +1,41 @@ +namespace Yesstr; + + + +// FilterSync + +table RequestSync { + filter: string; // only required for first in a sequence + reqsEncoded: [ubyte]; +} + +table ResponseSync { + respsEncoded: [ubyte]; +} + + + +// Request/Response wrappers + +union RequestPayload { + RequestSync, +} + +union ResponsePayload { + ResponseSync, +} + +table Request { + requestId: uint64; + payload: RequestPayload; +} + +table Response { + requestId: uint64; + payload: ResponsePayload; +} + + + +table Empty {} +root_type Empty; diff --git a/golpe b/golpe new file mode 160000 index 0000000..aa89358 --- /dev/null +++ b/golpe @@ -0,0 +1 @@ +Subproject commit aa893584fa05ab255407e5ed38e840354ad11d5d diff --git a/golpe.yaml b/golpe.yaml new file mode 100644 index 0000000..3ca4a67 --- /dev/null +++ b/golpe.yaml @@ -0,0 +1,110 @@ +appName: strfry + +quadrable: true + +flatBuffers: | + include "../fbs/nostr-index.fbs"; + +tables: + Event: + tableId: 1 + + primaryKey: quadId + + fields: + - name: quadId + - name: receivedAt # microseconds + - name: flat + type: ubytes + nestedFlat: NostrIndex.Event + + indices: + created_at: + integer: true + id: + comparator: StringUint64 + pubkey: + comparator: StringUint64 + kind: + comparator: Uint64Uint64 + pubkeyKind: + comparator: StringUint64Uint64 + tag: + comparator: StringUint64 + multi: true + deletion: # eventId, pubkey + multi: true + + indexPrelude: | + auto *flat = v.flat_nested(); + created_at = flat->created_at(); + uint64_t indexTime = *created_at; + + id = makeKey_StringUint64(sv(flat->id()), indexTime); + pubkey = makeKey_StringUint64(sv(flat->pubkey()), indexTime); + kind = makeKey_Uint64Uint64(flat->kind(), indexTime); + pubkeyKind = makeKey_StringUint64Uint64(sv(flat->pubkey()), flat->kind(), indexTime); + + for (const auto &tagPair : *(flat->tags())) { + auto tagName = (char)tagPair->key(); + auto tagVal = sv(tagPair->val()); + tag.push_back(makeKey_StringUint64(std::string(1, tagName) + std::string(tagVal), indexTime)); + if (flat->kind() == 5 && tagName == 'e') deletion.push_back(std::string(tagVal) + std::string(sv(flat->pubkey()))); + } + +config: + - name: db + default: "./strfry-db/" + noReload: true + + - name: relay__port + default: 7777 + noReload: true + - name: relay__bind + default: "127.0.0.1" + noReload: true + + - name: relay__info__name + default: "strfry default" + - name: relay__info__description + default: "This is a strfry instance." + - name: relay__info__pubkey + default: "unset" + - name: relay__info__contact + default: "unset" + + - name: relay__numThreads__ingester + default: 3 + noReload: true + - name: relay__numThreads__reqWorker + default: 3 + noReload: true + - name: relay__numThreads__reqMonitor + default: 3 + noReload: true + - name: relay__numThreads__yesstr + default: 1 + noReload: true + + - name: relay__maxWebsocketPayloadSize + default: 131072 + noReload: true + - name: relay__queryTimesliceBudgetMicroseconds + default: 10000 + - name: relay__maxFilterLimit + default: 500 + + - name: events__rejectEventsNewerThanSeconds + default: 900 # 15 mins + - name: events__rejectEventsOlderThanSeconds + default: 604800 # 1 week + - name: events__rejectEphemeralEventsOlderThanSeconds + default: 60 + - name: events__ephemeralEventsLifetimeSeconds + default: 300 + - name: events__maxEventSize + default: 65536 + - name: events__maxNumTags + default: 250 + - name: events__maxTagValSize + default: 128 diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h new file mode 100644 index 0000000..68823b0 --- /dev/null +++ b/src/ActiveMonitors.h @@ -0,0 +1,227 @@ +#pragma once + +#include "golpe.h" + +#include "Subscription.h" +#include "filters.h" + + + +struct ActiveMonitors : NonCopyable { + private: + struct Monitor : NonCopyable { + Subscription sub; + + Monitor(Subscription &sub_) : sub(std::move(sub_)) {} + }; + + using ConnMonitor = std::map; + std::map conns; // connId -> subId -> Monitor + + struct MonitorItem { + Monitor *mon; + uint64_t latestEventId; + }; + + using MonitorSet = std::map; // FIXME: flat_map here + std::map allIds; + std::map allAuthors; + std::map allTags; + std::map allKinds; + MonitorSet allOthers; + + + public: + void addSub(lmdb::txn &txn, Subscription &&sub, uint64_t currEventId) { + if (sub.latestEventId != currEventId) throw herr("sub not up to date"); + + { + auto *existing = findMonitor(sub.connId, sub.subId); + if (existing) removeSub(sub.connId, sub.subId); + } + + auto res = conns.try_emplace(sub.connId); + auto &connMonitors = res.first->second; + + auto subId = sub.subId; + auto *m = &connMonitors.try_emplace(subId, sub).first->second; + + installLookups(m, currEventId); + } + + void removeSub(uint64_t connId, const SubId &subId) { + auto *monitor = findMonitor(connId, subId); + if (!monitor) return; + + uninstallLookups(monitor); + + conns[connId].erase(subId); + if (conns[connId].empty()) conns.erase(connId); + } + + void closeConn(uint64_t connId) { + auto f1 = conns.find(connId); + if (f1 == conns.end()) return; + + for (auto &[k, v] : f1->second) uninstallLookups(&v); + + conns.erase(connId); + } + + void process(lmdb::txn &txn, defaultDb::environment::View_Event &ev, std::function cb) { + RecipientList recipients; + + auto processMonitorSet = [&](MonitorSet &ms){ + for (auto &[f, item] : ms) { + if (item.latestEventId >= ev.primaryKeyId || item.mon->sub.latestEventId >= ev.primaryKeyId) continue; + item.latestEventId = ev.primaryKeyId; + + if (f->doesMatch(ev.flat_nested())) { + recipients.emplace_back(item.mon->sub.connId, item.mon->sub.subId); + item.mon->sub.latestEventId = ev.primaryKeyId; + continue; + } + } + }; + + auto processMonitorsPrefix = [&](std::map &m, const std::string &key, std::function matches){ + auto it = m.lower_bound(key.substr(0, 1)); + + if (it == m.end()) return; + + while (it != m.end() && it->first[0] == key[0]) { + if (matches(it->first)) processMonitorSet(it->second); + it = std::next(it); + } + }; + + auto processMonitorsExact = [&](std::map &m, const T &key, std::function matches){ + auto it = m.upper_bound(key); + + if (it == m.begin()) return; + it = std::prev(it); + + while (matches(it->first)) { + processMonitorSet(it->second); + if (it == m.begin()) break; + it = std::prev(it); + } + }; + + auto *flat = ev.flat_nested(); + + { + auto id = std::string(sv(flat->id())); + processMonitorsPrefix(allIds, id, static_cast>([&](const std::string &val){ + return id.starts_with(val); + })); + } + + { + auto pubkey = std::string(sv(flat->pubkey())); + processMonitorsPrefix(allAuthors, pubkey, static_cast>([&](const std::string &val){ + return pubkey.starts_with(val); + })); + } + + for (const auto &tag : *flat->tags()) { + // FIXME: can avoid this allocation: + auto tagSpec = std::string(1, (char)tag->key()) + std::string(sv(tag->val())); + + processMonitorsExact(allTags, tagSpec, static_cast>([&](const std::string &val){ + return tagSpec == val; + })); + } + + { + auto kind = flat->kind(); + processMonitorsExact(allKinds, kind, static_cast>([&](const uint64_t &val){ + return kind == val; + })); + } + + processMonitorSet(allOthers); + + if (recipients.size()) { + cb(std::move(recipients), ev.primaryKeyId); + } + } + + + private: + Monitor *findMonitor(uint64_t connId, const SubId &subId) { + auto f1 = conns.find(connId); + if (f1 == conns.end()) return nullptr; + + auto f2 = f1->second.find(subId); + if (f2 == f1->second.end()) return nullptr; + + return &f2->second; + } + + void installLookups(Monitor *m, uint64_t currEventId) { + for (auto &f : m->sub.filterGroup.filters) { + if (f.ids.size()) { + for (size_t i = 0; i < f.ids.size(); i++) { + auto res = allIds.try_emplace(f.ids.at(i)); + res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); + } + } else if (f.authors.size()) { + for (size_t i = 0; i < f.authors.size(); i++) { + auto res = allAuthors.try_emplace(f.authors.at(i)); + res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); + } + } else if (f.tags.size()) { + for (const auto &[tagName, filterSet] : f.tags) { + for (size_t i = 0; i < filterSet.size(); i++) { + std::string tagSpec = std::string(1, tagName) + filterSet.at(i); + auto res = allTags.try_emplace(tagSpec); + res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); + } + } + } else if (f.kinds.size()) { + for (size_t i = 0; i < f.kinds.size(); i++) { + auto res = allKinds.try_emplace(f.kinds.at(i)); + res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); + } + } else { + allOthers.try_emplace(&f, MonitorItem{m, currEventId}); + } + } + } + + void uninstallLookups(Monitor *m) { + for (auto &f : m->sub.filterGroup.filters) { + if (f.ids.size()) { + for (size_t i = 0; i < f.ids.size(); i++) { + auto &monSet = allIds.at(f.ids.at(i)); + monSet.erase(&f); + if (monSet.empty()) allIds.erase(f.ids.at(i)); + } + } else if (f.authors.size()) { + for (size_t i = 0; i < f.authors.size(); i++) { + auto &monSet = allAuthors.at(f.authors.at(i)); + monSet.erase(&f); + if (monSet.empty()) allAuthors.erase(f.authors.at(i)); + } + } else if (f.tags.size()) { + for (const auto &[tagName, filterSet] : f.tags) { + for (size_t i = 0; i < filterSet.size(); i++) { + std::string tagSpec = std::string(1, tagName) + filterSet.at(i); + auto &monSet = allTags.at(tagSpec); + monSet.erase(&f); + if (monSet.empty()) allTags.erase(tagSpec); + } + } + } else if (f.kinds.size()) { + for (size_t i = 0; i < f.kinds.size(); i++) { + auto &monSet = allKinds.at(f.kinds.at(i)); + monSet.erase(&f); + if (monSet.empty()) allKinds.erase(f.kinds.at(i)); + } + } else { + allOthers.erase(&f); + } + } + } +}; diff --git a/src/DBScan.h b/src/DBScan.h new file mode 100644 index 0000000..ae2f33d --- /dev/null +++ b/src/DBScan.h @@ -0,0 +1,328 @@ +#pragma once + +#include "golpe.h" + +#include "Subscription.h" +#include "filters.h" + + +struct DBScan { + const NostrFilter &f; + uint64_t remainingLimit; + + struct NullState { + }; + + struct IdScan { + size_t index = 0; + std::string prefix; + }; + + struct PubkeyKindScan { + size_t indexAuthor = 0; + size_t indexKind = 0; + std::string prefix; + }; + + struct PubkeyScan { + size_t index = 0; + std::string prefix; + }; + + struct TagScan { + std::map::const_iterator indexTagName; + size_t indexTagVal = 0; + std::string search; + }; + + struct KindScan { + size_t index = 0; + uint64_t kind; + }; + + struct CreatedAtScan { + bool done = false; + }; + + std::variant scanState = NullState{}; + lmdb::dbi indexDbi; + std::string resumeKey; + uint64_t resumeVal; + + std::function isComplete; + std::function nextFilterItem; + std::function resetResume; + std::function keyMatch; + + DBScan(const NostrFilter &f_) : f(f_) { + remainingLimit = f.limit; + + if (f.ids.size()) { + LI << "ID Scan"; + + scanState = IdScan{}; + auto *state = std::get_if(&scanState); + indexDbi = env.dbi_Event__id; + + isComplete = [&, state]{ + return state->index >= f.ids.size(); + }; + nextFilterItem = [&, state]{ + state->index++; + }; + resetResume = [&, state]{ + state->prefix = f.ids.at(state->index); + resumeKey = padBytes(state->prefix, 32 + 8, '\xFF'); + resumeVal = MAX_U64; + }; + keyMatch = [&, state](std::string_view k, bool&){ + return k.starts_with(state->prefix); + }; + } else if (f.authors.size() && f.kinds.size()) { + LI << "PubkeyKind Scan"; + + scanState = PubkeyKindScan{}; + auto *state = std::get_if(&scanState); + indexDbi = env.dbi_Event__pubkeyKind; + + isComplete = [&, state]{ + return state->indexAuthor >= f.authors.size(); + }; + nextFilterItem = [&, state]{ + state->indexKind++; + if (state->indexKind >= f.kinds.size()) { + state->indexAuthor++; + state->indexKind = 0; + } + }; + resetResume = [&, state]{ + state->prefix = f.authors.at(state->indexAuthor); + if (state->prefix.size() == 32) state->prefix += lmdb::to_sv(f.kinds.at(state->indexKind)); + resumeKey = padBytes(state->prefix, 32 + 8 + 8, '\xFF'); + resumeVal = MAX_U64; + }; + keyMatch = [&, state](std::string_view k, bool &skipBack){ + if (!k.starts_with(state->prefix)) return false; + if (state->prefix.size() == 32 + 8) return true; + + ParsedKey_StringUint64Uint64 parsedKey(k); + if (parsedKey.n1 <= f.kinds.at(state->indexKind)) return true; + + resumeKey = makeKey_StringUint64Uint64(parsedKey.s, f.kinds.at(state->indexKind), MAX_U64); + resumeVal = MAX_U64; + skipBack = true; + return false; + }; + } else if (f.authors.size()) { + LI << "Pubkey Scan"; + + scanState = PubkeyScan{}; + auto *state = std::get_if(&scanState); + indexDbi = env.dbi_Event__pubkey; + + isComplete = [&, state]{ + return state->index >= f.authors.size(); + }; + nextFilterItem = [&, state]{ + state->index++; + }; + resetResume = [&, state]{ + state->prefix = f.authors.at(state->index); + resumeKey = padBytes(state->prefix, 32 + 8, '\xFF'); + resumeVal = MAX_U64; + }; + keyMatch = [&, state](std::string_view k, bool&){ + return k.starts_with(state->prefix); + }; + } else if (f.tags.size()) { + LI << "Tag Scan"; + + scanState = TagScan{f.tags.begin()}; + auto *state = std::get_if(&scanState); + indexDbi = env.dbi_Event__tag; + + isComplete = [&, state]{ + return state->indexTagName == f.tags.end(); + }; + nextFilterItem = [&, state]{ + state->indexTagVal++; + if (state->indexTagVal >= state->indexTagName->second.size()) { + state->indexTagName = std::next(state->indexTagName); + state->indexTagVal = 0; + } + }; + resetResume = [&, state]{ + state->search = state->indexTagName->first; + state->search += state->indexTagName->second.at(state->indexTagVal); + resumeKey = state->search + std::string(8, '\xFF'); + resumeVal = MAX_U64; + }; + keyMatch = [&, state](std::string_view k, bool&){ + return k.substr(0, state->search.size()) == state->search; + }; + } else if (f.kinds.size()) { + LI << "Kind Scan"; + + scanState = KindScan{}; + auto *state = std::get_if(&scanState); + indexDbi = env.dbi_Event__kind; + + isComplete = [&, state]{ + return state->index >= f.kinds.size(); + }; + nextFilterItem = [&, state]{ + state->index++; + }; + resetResume = [&, state]{ + state->kind = f.kinds.at(state->index); + resumeKey = std::string(lmdb::to_sv(state->kind)) + std::string(8, '\xFF'); + resumeVal = MAX_U64; + }; + keyMatch = [&, state](std::string_view k, bool&){ + ParsedKey_Uint64Uint64 parsedKey(k); + return parsedKey.n1 == state->kind; + }; + } else { + LI << "CreatedAt Scan"; + + scanState = CreatedAtScan{}; + auto *state = std::get_if(&scanState); + indexDbi = env.dbi_Event__created_at; + + isComplete = [&, state]{ + return state->done; + }; + nextFilterItem = [&, state]{ + state->done = true; + }; + resetResume = [&, state]{ + resumeKey = std::string(8, '\xFF'); + resumeVal = MAX_U64; + }; + keyMatch = [&, state](std::string_view k, bool&){ + return true; + }; + } + } + + // If scan is complete, returns true + bool scan(lmdb::txn &txn, std::function handleEvent, std::function doPause) { + while (remainingLimit && !isComplete()) { + if (resumeKey == "") resetResume(); + + bool pause = false, skipBack = false; + + env.generic_foreachFull(txn, indexDbi, resumeKey, lmdb::to_sv(resumeVal), [&](auto k, auto v) { + if (doPause()) { + resumeKey = std::string(k); + resumeVal = lmdb::from_sv(v); + LI << "SAVING resumeKey: " << to_hex(resumeKey) << " / " << resumeVal; + pause = true; + return false; + } + + if (!keyMatch(k, skipBack)) return false; + + uint64_t created; + + { + ParsedKey_StringUint64 parsedKey(k); + created = parsedKey.n; + + if ((f.since && created < f.since)) { + resumeKey = makeKey_StringUint64(parsedKey.s, 0); + resumeVal = 0; + skipBack = true; + return false; + } + + if (f.until && created > f.until) { + resumeKey = makeKey_StringUint64(parsedKey.s, f.until); + resumeVal = MAX_U64; + skipBack = true; + return false; + } + } + + bool sent = false; + uint64_t quadId = lmdb::from_sv(v); + + if (f.indexOnlyScans) { + if (f.doesMatchTimes(created)) { + handleEvent(quadId); + sent = true; + } + } else { + auto view = env.lookup_Event(txn, quadId); + if (!view) throw herr("missing event from index, corrupt DB?"); + if (f.doesMatch(view->flat_nested())) { + handleEvent(quadId); + sent = true; + } + } + + if (sent) { + if (remainingLimit) remainingLimit--; + if (!remainingLimit) return false; + } + + return true; + }, true); + + if (pause) return false; + + if (!skipBack) { + nextFilterItem(); + resumeKey = ""; + } + } + + return true; + } + + std::string padBytes(std::string_view str, size_t n, char padChar) { + if (str.size() > n) throw herr("unable to pad, string longer than expected"); + return std::string(str) + std::string(n - str.size(), padChar); + } +}; + + +struct DBScanQuery : NonCopyable { + Subscription sub; + std::unique_ptr scanner; + + size_t filterGroupIndex = 0; + bool dead = false; + std::unordered_set alreadySentEvents; // FIXME: flat_set here, or roaring bitmap/judy/whatever + + DBScanQuery(Subscription &sub_) : sub(std::move(sub_)) {} + + // If scan is complete, returns true + bool process(lmdb::txn &txn, uint64_t timeBudgetMicroseconds, std::function cb) { + uint64_t startTime = hoytech::curr_time_us(); + + while (filterGroupIndex < sub.filterGroup.size()) { + if (!scanner) scanner = std::make_unique(sub.filterGroup.filters[filterGroupIndex]); + + bool complete = scanner->scan(txn, [&](uint64_t quadId){ + // If this event came in after our query began, don't send it. It will be sent after the EOSE. + if (quadId > sub.latestEventId) return; + + // We already sent this event + if (alreadySentEvents.find(quadId) != alreadySentEvents.end()) return; + alreadySentEvents.insert(quadId); + + cb(sub, quadId); + }, [&]{ + return hoytech::curr_time_us() - startTime > timeBudgetMicroseconds; + }); + + if (!complete) return false; + + filterGroupIndex++; + scanner.reset(); + } + + return true; + } +}; diff --git a/src/RelayCron.cpp b/src/RelayCron.cpp new file mode 100644 index 0000000..b9e20ec --- /dev/null +++ b/src/RelayCron.cpp @@ -0,0 +1,72 @@ +#include "RelayServer.h" + + +void RelayServer::cleanupOldEvents() { + struct EventDel { + uint64_t nodeId; + uint64_t deletedNodeId; + }; + + std::vector expiredEvents; + + { + auto txn = env.txn_ro(); + + auto mostRecent = getMostRecentEventId(txn); + uint64_t cutoff = hoytech::curr_time_s() - cfg().events__ephemeralEventsLifetimeSeconds; + uint64_t currKind = 20'000; + + while (currKind < 30'000) { + uint64_t numRecs = 0; + + env.generic_foreachFull(txn, env.dbi_Event__kind, makeKey_Uint64Uint64(currKind, 0), lmdb::to_sv(0), [&](auto k, auto v) { + numRecs++; + ParsedKey_Uint64Uint64 parsedKey(k); + currKind = parsedKey.n1; + + if (currKind >= 30'000) return false; + + if (parsedKey.n2 > cutoff) { + currKind++; + return false; + } + + uint64_t nodeId = lmdb::from_sv(v); + + if (nodeId != mostRecent) { // prevent nodeId re-use + expiredEvents.emplace_back(nodeId, 0); + } + + return true; + }); + + if (numRecs == 0) break; + } + } + + if (expiredEvents.size() > 0) { + LI << "Deleting " << expiredEvents.size() << " old events"; + + auto txn = env.txn_rw(); + + quadrable::Quadrable qdb; + qdb.init(txn); + qdb.checkout("events"); + + auto changes = qdb.change(); + + for (auto &e : expiredEvents) { + auto view = env.lookup_Event(txn, e.nodeId); + if (!view) throw herr("missing event from index, corrupt DB?"); + changes.del(flatEventToQuadrableKey(view->flat_nested()), &e.deletedNodeId); + } + + changes.apply(txn); + + for (auto &e : expiredEvents) { + if (e.deletedNodeId) env.delete_Event(txn, e.nodeId); + } + + txn.commit(); + } +} diff --git a/src/RelayIngester.cpp b/src/RelayIngester.cpp new file mode 100644 index 0000000..8b5634e --- /dev/null +++ b/src/RelayIngester.cpp @@ -0,0 +1,109 @@ +#include "RelayServer.h" + + +void RelayServer::runIngester(ThreadPool::Thread &thr) { + secp256k1_context *secpCtx = secp256k1_context_create(SECP256K1_CONTEXT_VERIFY); + + while(1) { + auto newMsgs = thr.inbox.pop_all(); + + auto txn = env.txn_ro(); + + std::vector writerMsgs; + + for (auto &newMsg : newMsgs) { + if (auto msg = std::get_if(&newMsg.msg)) { + try { + if (msg->payload.starts_with('[')) { + auto payload = tao::json::from_string(msg->payload); + + if (!payload.is_array()) throw herr("message is not an array"); + auto &arr = payload.get_array(); + if (arr.size() < 2) throw herr("bad message"); + + auto &cmd = arr[0].get_string(); + + if (cmd == "EVENT") { + try { + ingesterProcessEvent(txn, msg->connId, secpCtx, arr[1], writerMsgs); + } catch (std::exception &e) { + sendOKResponse(msg->connId, arr[1].at("id").get_string(), false, std::string("invalid: ") + e.what()); + LI << "Rejected invalid event: " << e.what(); + } + } else if (cmd == "REQ") { + try { + ingesterProcessReq(txn, msg->connId, arr); + } catch (std::exception &e) { + sendNoticeError(msg->connId, std::string("bad req: ") + e.what()); + } + } else if (cmd == "CLOSE") { + try { + ingesterProcessClose(txn, msg->connId, arr); + } catch (std::exception &e) { + sendNoticeError(msg->connId, std::string("bad close: ") + e.what()); + } + } else { + throw herr("unknown cmd"); + } + } else if (msg->payload.starts_with("Y")) { + verifyYesstrRequest(msg->payload); + + auto *req = parseYesstrRequest(msg->payload); + + if (req->payload_type() == Yesstr::RequestPayload::RequestPayload_RequestSync) { + tpYesstr.dispatch(msg->connId, MsgYesstr{MsgYesstr::SyncRequest{ msg->connId, std::move(msg->payload) }}); + } else { + throw herr("unrecognised yesstr request"); + } + } else { + throw herr("unparseable message"); + } + } catch (std::exception &e) { + sendNoticeError(msg->connId, std::string("bad msg: ") + e.what()); + } + } else if (auto msg = std::get_if(&newMsg.msg)) { + auto connId = msg->connId; + tpReqWorker.dispatch(connId, MsgReqWorker{MsgReqWorker::CloseConn{connId}}); + tpYesstr.dispatch(connId, MsgYesstr{MsgYesstr::CloseConn{connId}}); + } + } + + if (writerMsgs.size()) { + tpWriter.dispatchMulti(0, writerMsgs); + } + } +} + +void RelayServer::ingesterProcessEvent(lmdb::txn &txn, uint64_t connId, secp256k1_context *secpCtx, const tao::json::value &origJson, std::vector &output) { + std::string flatStr, jsonStr; + + parseAndVerifyEvent(origJson, secpCtx, true, true, flatStr, jsonStr); + + auto *flat = flatbuffers::GetRoot(flatStr.data()); + + { + auto existing = lookupEventById(txn, sv(flat->id())); + if (existing) { + LI << "Duplicate event, skipping"; + sendOKResponse(connId, to_hex(sv(flat->id())), false, "duplicate: have this event"); + return; + } + } + + output.emplace_back(MsgWriter{MsgWriter::AddEvent{connId, hoytech::curr_time_us(), std::move(flatStr), std::move(jsonStr)}}); +} + +void RelayServer::ingesterProcessReq(lmdb::txn &txn, uint64_t connId, const tao::json::value &arr) { + if (arr.get_array().size() < 2 + 1) throw herr("arr too small"); + if (arr.get_array().size() > 2 + 20) throw herr("arr too big"); + + Subscription sub(connId, arr[1].get_string(), NostrFilterGroup(arr)); + + tpReqWorker.dispatch(connId, MsgReqWorker{MsgReqWorker::NewSub{std::move(sub)}}); +} + +void RelayServer::ingesterProcessClose(lmdb::txn &txn, uint64_t connId, const tao::json::value &arr) { + if (arr.get_array().size() != 2) throw herr("arr too small/big"); + + tpReqWorker.dispatch(connId, MsgReqWorker{MsgReqWorker::RemoveSub{connId, SubId(arr[1].get_string())}}); +} diff --git a/src/RelayReqMonitor.cpp b/src/RelayReqMonitor.cpp new file mode 100644 index 0000000..717cb9c --- /dev/null +++ b/src/RelayReqMonitor.cpp @@ -0,0 +1,57 @@ +#include "RelayServer.h" + +#include "ActiveMonitors.h" + + + +void RelayServer::runReqMonitor(ThreadPool::Thread &thr) { + auto dbChangeWatcher = hoytech::file_change_monitor(dbDir + "/data.mdb"); + + dbChangeWatcher.setDebounce(100); + + dbChangeWatcher.run([&](){ + tpReqMonitor.dispatchToAll([]{ return MsgReqMonitor{MsgReqMonitor::DBChange{}}; }); + }); + + + ActiveMonitors monitors; + uint64_t currEventId = MAX_U64; + + while (1) { + auto newMsgs = thr.inbox.pop_all(); + + auto txn = env.txn_ro(); + + uint64_t latestEventId = getMostRecentEventId(txn); + if (currEventId > latestEventId) currEventId = latestEventId; + + for (auto &newMsg : newMsgs) { + if (auto msg = std::get_if(&newMsg.msg)) { + env.foreach_Event(txn, [&](auto &ev){ + if (msg->sub.filterGroup.doesMatch(ev.flat_nested())) { + sendEvent(msg->sub.connId, msg->sub.subId, getEventJson(txn, ev.primaryKeyId)); + } + + return true; + }, false, msg->sub.latestEventId + 1); + + msg->sub.latestEventId = latestEventId; + + monitors.addSub(txn, std::move(msg->sub), latestEventId); + } else if (auto msg = std::get_if(&newMsg.msg)) { + monitors.removeSub(msg->connId, msg->subId); + } else if (auto msg = std::get_if(&newMsg.msg)) { + monitors.closeConn(msg->connId); + } else if (std::get_if(&newMsg.msg)) { + env.foreach_Event(txn, [&](auto &ev){ + monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t quadId){ + sendEventToBatch(std::move(recipients), std::string(getEventJson(txn, quadId))); + }); + return true; + }, false, currEventId + 1); + + currEventId = latestEventId; + } + } + } +} diff --git a/src/RelayReqWorker.cpp b/src/RelayReqWorker.cpp new file mode 100644 index 0000000..6ecb92b --- /dev/null +++ b/src/RelayReqWorker.cpp @@ -0,0 +1,111 @@ +#include "RelayServer.h" +#include "DBScan.h" + + + +struct ActiveQueries : NonCopyable { + using ConnQueries = std::map; + std::map conns; // connId -> subId -> DBScanQuery* + std::deque running; + + void addSub(lmdb::txn &txn, Subscription &&sub) { + sub.latestEventId = getMostRecentEventId(txn); + + { + auto *existing = findQuery(sub.connId, sub.subId); + if (existing) removeSub(sub.connId, sub.subId); + } + + auto res = conns.try_emplace(sub.connId); + auto &connQueries = res.first->second; + + DBScanQuery *q = new DBScanQuery(sub); + + connQueries.try_emplace(q->sub.subId, q); + running.push_front(q); + } + + DBScanQuery *findQuery(uint64_t connId, const SubId &subId) { + auto f1 = conns.find(connId); + if (f1 == conns.end()) return nullptr; + + auto f2 = f1->second.find(subId); + if (f2 == f1->second.end()) return nullptr; + + return f2->second; + } + + void removeSub(uint64_t connId, const SubId &subId) { + auto *query = findQuery(connId, subId); + if (!query) return; + query->dead = true; + conns[connId].erase(subId); + if (conns[connId].empty()) conns.erase(connId); + } + + void closeConn(uint64_t connId) { + auto f1 = conns.find(connId); + if (f1 == conns.end()) return; + + for (auto &[k, v] : f1->second) v->dead = true; + + conns.erase(connId); + } + + void process(RelayServer *server, lmdb::txn &txn) { + if (running.empty()) return; + + DBScanQuery *q = running.front(); + running.pop_front(); + + if (q->dead) { + delete q; + return; + } + + bool complete = q->process(txn, cfg().relay__queryTimesliceBudgetMicroseconds, [&](const auto &sub, uint64_t quadId){ + server->sendEvent(sub.connId, sub.subId, getEventJson(txn, quadId)); + }); + + if (complete) { + auto connId = q->sub.connId; + + server->sendToConn(connId, tao::json::to_string(tao::json::value::array({ "EOSE", q->sub.subId.str() }))); + removeSub(connId, q->sub.subId); + + server->tpReqMonitor.dispatch(connId, MsgReqMonitor{MsgReqMonitor::NewSub{std::move(q->sub)}}); + + delete q; + } else { + running.push_back(q); + } + } +}; + + +void RelayServer::runReqWorker(ThreadPool::Thread &thr) { + ActiveQueries queries; + + while(1) { + auto newMsgs = queries.running.empty() ? thr.inbox.pop_all() : thr.inbox.pop_all_no_wait(); + + auto txn = env.txn_ro(); + + for (auto &newMsg : newMsgs) { + if (auto msg = std::get_if(&newMsg.msg)) { + queries.addSub(txn, std::move(msg->sub)); + queries.process(this, txn); + } else if (auto msg = std::get_if(&newMsg.msg)) { + queries.removeSub(msg->connId, msg->subId); + tpReqMonitor.dispatch(msg->connId, MsgReqMonitor{MsgReqMonitor::RemoveSub{msg->connId, msg->subId}}); + } else if (auto msg = std::get_if(&newMsg.msg)) { + queries.closeConn(msg->connId); + tpReqMonitor.dispatch(msg->connId, MsgReqMonitor{MsgReqMonitor::CloseConn{msg->connId}}); + } + } + + queries.process(this, txn); + + txn.abort(); + } +} diff --git a/src/RelayServer.h b/src/RelayServer.h new file mode 100644 index 0000000..bcad465 --- /dev/null +++ b/src/RelayServer.h @@ -0,0 +1,207 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "golpe.h" + +#include "Subscription.h" +#include "ThreadPool.h" +#include "events.h" +#include "filters.h" +#include "yesstr.h" + + + + +struct MsgWebsocket : NonCopyable { + struct Send { + uint64_t connId; + std::string payload; + }; + + struct SendBinary { + uint64_t connId; + std::string payload; + }; + + struct SendEventToBatch { + RecipientList list; + std::string evJson; + }; + + using Var = std::variant; + Var msg; + MsgWebsocket(Var &&msg_) : msg(std::move(msg_)) {} +}; + +struct MsgIngester : NonCopyable { + struct ClientMessage { + uint64_t connId; + std::string payload; + }; + + struct CloseConn { + uint64_t connId; + }; + + using Var = std::variant; + Var msg; + MsgIngester(Var &&msg_) : msg(std::move(msg_)) {} +}; + +struct MsgWriter : NonCopyable { + struct AddEvent { + uint64_t connId; + uint64_t receivedAt; + std::string flatStr; + std::string jsonStr; + }; + + using Var = std::variant; + Var msg; + MsgWriter(Var &&msg_) : msg(std::move(msg_)) {} +}; + +struct MsgReqWorker : NonCopyable { + struct NewSub { + Subscription sub; + }; + + struct RemoveSub { + uint64_t connId; + SubId subId; + }; + + struct CloseConn { + uint64_t connId; + }; + + using Var = std::variant; + Var msg; + MsgReqWorker(Var &&msg_) : msg(std::move(msg_)) {} +}; + +struct MsgReqMonitor : NonCopyable { + struct NewSub { + Subscription sub; + }; + + struct RemoveSub { + uint64_t connId; + SubId subId; + }; + + struct CloseConn { + uint64_t connId; + }; + + struct DBChange { + }; + + using Var = std::variant; + Var msg; + MsgReqMonitor(Var &&msg_) : msg(std::move(msg_)) {} +}; + +struct MsgYesstr : NonCopyable { + struct SyncRequest { + uint64_t connId; + std::string yesstrMessage; + }; + + struct CloseConn { + uint64_t connId; + }; + + using Var = std::variant; + Var msg; + MsgYesstr(Var &&msg_) : msg(std::move(msg_)) {} +}; + + +struct RelayServer { + std::unique_ptr hubTrigger; + + // Thread Pools + + ThreadPool tpWebsocket; + ThreadPool tpIngester; + ThreadPool tpWriter; + ThreadPool tpReqWorker; + ThreadPool tpReqMonitor; + ThreadPool tpYesstr; + hoytech::timer cron; + + void run(); + + void runWebsocket(ThreadPool::Thread &thr); + + void runIngester(ThreadPool::Thread &thr); + void ingesterProcessEvent(lmdb::txn &txn, uint64_t connId, secp256k1_context *secpCtx, const tao::json::value &origJson, std::vector &output); + void ingesterProcessReq(lmdb::txn &txn, uint64_t connId, const tao::json::value &origJson); + void ingesterProcessClose(lmdb::txn &txn, uint64_t connId, const tao::json::value &origJson); + + void runWriter(ThreadPool::Thread &thr); + + void runReqWorker(ThreadPool::Thread &thr); + + void runReqMonitor(ThreadPool::Thread &thr); + + void runYesstr(ThreadPool::Thread &thr); + + void cleanupOldEvents(); + + // Utils (can be called by any thread) + + void sendToConn(uint64_t connId, std::string &&payload) { + tpWebsocket.dispatch(0, MsgWebsocket{MsgWebsocket::Send{connId, std::move(payload)}}); + hubTrigger->send(); + } + + void sendToConn(uint64_t connId, std::string &payload) { + tpWebsocket.dispatch(0, MsgWebsocket{MsgWebsocket::Send{connId, std::move(payload)}}); + hubTrigger->send(); + } + + void sendToConnBinary(uint64_t connId, std::string &&payload) { + tpWebsocket.dispatch(0, MsgWebsocket{MsgWebsocket::SendBinary{connId, std::move(payload)}}); + hubTrigger->send(); + } + + void sendEvent(uint64_t connId, const SubId &subId, std::string_view evJson) { + std::string reply = std::string("[\"EVENT\",\""); + reply += subId.sv(); + reply += "\","; + reply += evJson; + reply += "]"; + sendToConn(connId, reply); + } + + void sendEventToBatch(RecipientList &&list, std::string &&evJson) { + tpWebsocket.dispatch(0, MsgWebsocket{MsgWebsocket::SendEventToBatch{std::move(list), std::move(evJson)}}); + hubTrigger->send(); + } + + void sendNoticeError(uint64_t connId, std::string &&payload) { + LI << "sending error to [" << connId << "]: " << payload; + auto reply = tao::json::value::array({ "NOTICE", std::string("ERROR: ") + payload }); + tpWebsocket.dispatch(0, MsgWebsocket{MsgWebsocket::Send{connId, std::move(tao::json::to_string(reply))}}); + hubTrigger->send(); + } + + void sendOKResponse(uint64_t connId, std::string_view eventIdHex, bool written, std::string_view message) { + auto reply = tao::json::value::array({ "OK", eventIdHex, written, message }); + tpWebsocket.dispatch(0, MsgWebsocket{MsgWebsocket::Send{connId, std::move(tao::json::to_string(reply))}}); + hubTrigger->send(); + } +}; diff --git a/src/RelayWebsocket.cpp b/src/RelayWebsocket.cpp new file mode 100644 index 0000000..e579eb6 --- /dev/null +++ b/src/RelayWebsocket.cpp @@ -0,0 +1,172 @@ +#include "RelayServer.h" + + +std::string preGenerateHttpResponse(const std::string &contentType, const std::string &content) { + std::string output = "HTTP/1.1 200 OK\r\n"; + output += std::string("Content-Type: ") + contentType + "\r\n"; + output += "Access-Control-Allow-Origin: *\r\n"; + output += "Connection: keep-alive\r\n"; + output += "Server: strfry\r\n"; + output += std::string("Content-Length: ") + std::to_string(content.size()) + "\r\n"; + output += "\r\n"; + output += content; + return output; +}; + + +void RelayServer::runWebsocket(ThreadPool::Thread &thr) { + struct Connection { + uWS::WebSocket *websocket; + uint64_t connId; + uint64_t connectedTimestamp; + std::string ipAddr; + + Connection(uWS::WebSocket *p, uint64_t connId_) + : websocket(p), connId(connId_), connectedTimestamp(hoytech::curr_time_us()) { } + Connection(const Connection &) = delete; + Connection(Connection &&) = delete; + }; + + uWS::Hub hub; + uWS::Group *hubGroup; + std::map connIdToConnection; + uint64_t nextConnectionId = 1; + + std::string tempBuf; + tempBuf.reserve(cfg().events__maxEventSize + MAX_SUBID_SIZE + 100); + + + + auto getServerInfoHttpResponse = [ver = uint64_t(0), rendered = std::string("")]() mutable { + if (ver != cfg().version()) { + rendered = preGenerateHttpResponse("application/json", tao::json::to_string(tao::json::value({ + { "name", cfg().relay__info__name }, + { "description", cfg().relay__info__description }, + { "pubkey", cfg().relay__info__pubkey }, + { "contact", cfg().relay__info__contact }, + { "supported_nips", tao::json::value::array({ 1, 9, 11, 12, 15, 16, 20, 22 }) }, + { "software", "git+https://github.com/hoytech/strfry.git" }, + { "version", GOLPE_GIT_VER }, + }))); + ver = cfg().version(); + } + + return std::string_view(rendered); + }; + + const std::string defaultHttpResponse = preGenerateHttpResponse("text/plain", "Please use a Nostr client to connect."); + + + + hubGroup = hub.createGroup(uWS::PERMESSAGE_DEFLATE | uWS::SLIDING_DEFLATE_WINDOW, cfg().relay__maxWebsocketPayloadSize); + + hubGroup->onHttpRequest([&](uWS::HttpResponse *res, uWS::HttpRequest req, char *data, size_t length, size_t remainingBytes){ + LI << "HTTP request for [" << req.getUrl().toString() << "]"; + + if (req.getHeader("accept").toString() == "application/nostr+json") { + auto info = getServerInfoHttpResponse(); + res->write(info.data(), info.size()); + } else { + res->write(defaultHttpResponse.data(), defaultHttpResponse.size()); + } + }); + + hubGroup->onConnection([&](uWS::WebSocket *ws, uWS::HttpRequest req) { + std::string addr = ws->getAddress().address; + uint64_t connId = nextConnectionId++; + LI << "[" << connId << "] Connect from " << addr; + Connection *c = new Connection(ws, connId); + c->ipAddr = addr; + ws->setUserData((void*)c); + connIdToConnection.emplace(connId, c); + + { + int optval = 1; + if (setsockopt(ws->getFd(), SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval))) { + LW << "Failed to enable TCP keepalive: " << strerror(errno); + } + } + }); + + hubGroup->onDisconnection([&](uWS::WebSocket *ws, int code, char *message, size_t length) { + Connection *c = (Connection*)ws->getUserData(); + uint64_t connId = c->connId; + + LI << "[" << connId << "] Disconnect from " << c->ipAddr; + + tpIngester.dispatch(connId, MsgIngester{MsgIngester::CloseConn{connId}}); + + connIdToConnection.erase(connId); + delete c; + }); + + hubGroup->onMessage2([&](uWS::WebSocket *ws, char *message, size_t length, uWS::OpCode opCode, size_t compressedSize) { + //LI << "Decompression: " << compressedSize << " -> " << length; + + auto &c = *(Connection*)ws->getUserData(); + + tpIngester.dispatch(c.connId, MsgIngester{MsgIngester::ClientMessage{c.connId, std::string(message, length)}}); + }); + + + std::function asyncCb = [&]{ + auto newMsgs = thr.inbox.pop_all_no_wait(); + + auto doSend = [&](uint64_t connId, std::string_view payload, uWS::OpCode opCode){ + auto it = connIdToConnection.find(connId); + if (it == connIdToConnection.end()) return; + auto &c = *it->second; + + size_t compressedSize; + auto cb = [](uWS::WebSocket *webSocket, void *data, bool cancelled, void *reserved){}; + c.websocket->send(payload.data(), payload.size(), opCode, cb, nullptr, true, &compressedSize); + //LI << "Compression: " << payload.size() << " -> " << compressedSize; + }; + + for (auto &newMsg : newMsgs) { + if (auto msg = std::get_if(&newMsg.msg)) { + doSend(msg->connId, msg->payload, uWS::OpCode::TEXT); + } else if (auto msg = std::get_if(&newMsg.msg)) { + doSend(msg->connId, msg->payload, uWS::OpCode::BINARY); + } else if (auto msg = std::get_if(&newMsg.msg)) { + for (auto &item : msg->list) { + tempBuf.clear(); + tempBuf += "[\"EVENT\",\""; + tempBuf += item.subId.sv(); + tempBuf += "\","; + tempBuf += msg->evJson; + tempBuf += "]"; + + auto it = connIdToConnection.find(item.connId); + if (it == connIdToConnection.end()) continue; + auto &c = *it->second; + + size_t compressedSize; + auto cb = [](uWS::WebSocket *webSocket, void *data, bool cancelled, void *reserved){}; + c.websocket->send(tempBuf.data(), tempBuf.size(), uWS::OpCode::TEXT, cb, nullptr, true, &compressedSize); + //LI << "Compression: " << msg->payload.size() << " -> " << compressedSize; + } + } + } + }; + + hubTrigger = std::make_unique(hub.getLoop()); + hubTrigger->setData(&asyncCb); + + hubTrigger->start([](uS::Async *a){ + auto *r = static_cast *>(a->data); + (*r)(); + }); + + + + int port = cfg().relay__port; + + std::string bindHost = cfg().relay__bind; + + if (!hub.listen(bindHost.c_str(), port, nullptr, uS::REUSE_PORT, hubGroup)) throw herr("unable to listen on port ", port); + + LI << "Started websocket server on " << bindHost << ":" << port; + + hub.run(); +} diff --git a/src/RelayWriter.cpp b/src/RelayWriter.cpp new file mode 100644 index 0000000..326525e --- /dev/null +++ b/src/RelayWriter.cpp @@ -0,0 +1,59 @@ +#include "RelayServer.h" + + +void RelayServer::runWriter(ThreadPool::Thread &thr) { + quadrable::Quadrable qdb; + { + auto txn = env.txn_ro(); + qdb.init(txn); + } + qdb.checkout("events"); + + while(1) { + auto newMsgs = thr.inbox.pop_all(); + + // Prepare messages + + std::deque newEvents; + + for (auto &newMsg : newMsgs) { + if (auto msg = std::get_if(&newMsg.msg)) { + newEvents.emplace_back(std::move(msg->flatStr), std::move(msg->jsonStr), msg->receivedAt, msg); + } + } + + { + auto txn = env.txn_rw(); + writeEvents(txn, qdb, newEvents); + txn.commit(); + } + + // Log + + for (auto &newEvent : newEvents) { + auto *flat = flatbuffers::GetRoot(newEvent.flatStr.data()); + auto eventIdHex = to_hex(sv(flat->id())); + std::string message; + bool written = false; + + if (newEvent.status == EventWriteStatus::Written) { + LI << "Inserted event. id=" << eventIdHex << " qdbNodeId=" << newEvent.nodeId; + written = true; + } else if (newEvent.status == EventWriteStatus::Duplicate) { + message = "duplicate: have this event"; + } else if (newEvent.status == EventWriteStatus::Replaced) { + message = "replaced: have newer event"; + } else if (newEvent.status == EventWriteStatus::Deleted) { + message = "deleted: user requested deletion"; + } + + if (newEvent.status != EventWriteStatus::Written) { + LI << "Rejected event. " << message << ", id=" << eventIdHex; + } + + MsgWriter::AddEvent *addEventMsg = static_cast(newEvent.userData); + + sendOKResponse(addEventMsg->connId, eventIdHex, written, message); + } + } +} diff --git a/src/RelayYesstr.cpp b/src/RelayYesstr.cpp new file mode 100644 index 0000000..0c7c00f --- /dev/null +++ b/src/RelayYesstr.cpp @@ -0,0 +1,146 @@ +#include +#include + +#include "RelayServer.h" +#include "DBScan.h" + + +void RelayServer::runYesstr(ThreadPool::Thread &thr) { + quadrable::Quadrable qdb; + { + auto txn = env.txn_ro(); + qdb.init(txn); + } + + + struct SyncState { + quadrable::MemStore m; + }; + + struct SyncStateCollection { + RelayServer *server; + quadrable::Quadrable *qdb; + std::map> conns; // connId -> reqId -> SyncState + + SyncStateCollection(RelayServer *server_, quadrable::Quadrable *qdb_) : server(server_), qdb(qdb_) {} + + SyncState *lookup(uint64_t connId, uint64_t reqId) { + if (!conns.contains(connId)) return nullptr; + if (!conns[connId].contains(reqId)) return nullptr; + return &conns[connId][reqId]; + } + + SyncState *newRequest(lmdb::txn &txn, uint64_t connId, uint64_t reqId, std::string_view filterStr) { + if (!conns.contains(connId)) conns.try_emplace(connId); + if (conns[connId].contains(reqId)) { + LI << "Client tried to re-use reqId for new filter, ignoring"; + return &conns[connId][reqId]; + } + conns[connId].try_emplace(reqId); + auto &s = conns[connId][reqId]; + + if (filterStr == "{}") { + qdb->checkout("events"); + uint64_t nodeId = qdb->getHeadNodeId(txn); + + qdb->withMemStore(s.m, [&]{ + qdb->writeToMemStore = true; + qdb->checkout(nodeId); + }); + } else { + // FIXME: The following blocks the whole thread for the query duration. Should interleave it + // with other requests like RelayReqWorker does. + + LI << "Yesstr sync: Running filter: " << filterStr; + + std::vector quadEventIds; + auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr)); + Subscription sub(1, "junkSub", filterGroup); + DBScanQuery query(sub); + + while (1) { + bool complete = query.process(txn, MAX_U64, [&](const auto &sub, uint64_t quadId){ + quadEventIds.push_back(quadId); + }); + + if (complete) break; + } + + LI << "Filter matched " << quadEventIds.size() << " local events"; + + qdb->withMemStore(s.m, [&]{ + qdb->writeToMemStore = true; + qdb->checkout(); + + auto changes = qdb->change(); + + for (auto id : quadEventIds) { + changes.putReuse(txn, id); + } + + changes.apply(txn); + }); + } + + return &s; + } + + + void handleRequest(lmdb::txn &txn, uint64_t connId, uint64_t reqId, std::string_view filterStr, std::string_view reqsEncoded) { + SyncState *s = lookup(connId, reqId); + + if (!s) s = newRequest(txn, connId, reqId, filterStr); + + auto reqs = quadrable::transport::decodeSyncRequests(reqsEncoded); + + quadrable::SyncResponses resps; + + qdb->withMemStore(s->m, [&]{ + qdb->writeToMemStore = true; + LI << "ZZZ NODE " << qdb->getHeadNodeId(txn); + resps = qdb->handleSyncRequests(txn, qdb->getHeadNodeId(txn), reqs, 100'000); + }); + + std::string respsEncoded = quadrable::transport::encodeSyncResponses(resps); + + flatbuffers::FlatBufferBuilder builder; + + auto respOffset = Yesstr::CreateResponse(builder, + reqId, + Yesstr::ResponsePayload::ResponsePayload_ResponseSync, + Yesstr::CreateResponseSync(builder, + builder.CreateVector((uint8_t*)respsEncoded.data(), respsEncoded.size()) + ).Union() + ); + + builder.Finish(respOffset); + + std::string respMsg = std::string("Y") + std::string(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()); + server->sendToConnBinary(connId, std::move(respMsg)); + } + + void closeConn(uint64_t connId) { + conns.erase(connId); + } + }; + + SyncStateCollection states(this, &qdb); + + + while(1) { + auto newMsgs = thr.inbox.pop_all(); + + auto txn = env.txn_ro(); + + for (auto &newMsg : newMsgs) { + if (auto msg = std::get_if(&newMsg.msg)) { + const auto *req = parseYesstrRequest(msg->yesstrMessage); // validated by ingester + const auto *reqSync = req->payload_as(); + + states.handleRequest(txn, msg->connId, req->requestId(), sv(reqSync->filter()), sv(reqSync->reqsEncoded())); + } else if (auto msg = std::get_if(&newMsg.msg)) { + states.closeConn(msg->connId); + } + } + } +} diff --git a/src/Subscription.h b/src/Subscription.h new file mode 100644 index 0000000..02851bb --- /dev/null +++ b/src/Subscription.h @@ -0,0 +1,58 @@ +#pragma once + +#include "filters.h" + + +struct SubId { + char buf[40]; + + SubId(std::string_view val) { + static_assert(MAX_SUBID_SIZE == 39, "MAX_SUBID_SIZE mismatch"); + if (val.size() > 39) throw herr("subscription id too long"); + if (val.size() == 0) throw herr("subscription id too short"); + + auto badChar = [](char c){ + return c < 0x20 || c == '\\' || c == '"' || c >= 0x7F; + }; + + if (std::any_of(val.begin(), val.end(), badChar)) throw herr("invalid character in subscription id"); + + buf[0] = (char)val.size(); + memcpy(&buf[1], val.data(), val.size()); + } + + std::string_view sv() const { + return std::string_view(&buf[1], (size_t)buf[0]); + } + + std::string str() const { + return std::string(sv()); + } +}; + +inline bool operator <(const SubId &s1, const SubId &s2) { + return s1.sv() < s2.sv(); +} + + +struct Subscription : NonCopyable { + Subscription(uint64_t connId_, std::string subId_, NostrFilterGroup filterGroup_) : connId(connId_), subId(subId_), filterGroup(filterGroup_) {} + + // Params + + uint64_t connId; + SubId subId; + NostrFilterGroup filterGroup; + + // State + + uint64_t latestEventId = MAX_U64; +}; + + +struct ConnIdSubId { + uint64_t connId; + SubId subId; +}; + +using RecipientList = std::vector; diff --git a/src/ThreadPool.h b/src/ThreadPool.h new file mode 100644 index 0000000..573ce92 --- /dev/null +++ b/src/ThreadPool.h @@ -0,0 +1,61 @@ +#pragma once + +#include + + +template +struct ThreadPool { + uint64_t numThreads; + + struct Thread { + uint64_t id; + std::thread thread; + hoytech::protected_queue inbox; + }; + + std::deque pool; + + ~ThreadPool() { + join(); + } + + void init(std::string name, uint64_t numThreads_, std::function cb) { + if (numThreads_ == 0) throw herr("must have more than 0 threads"); + + numThreads = numThreads_; + + for (size_t i = 0; i < numThreads; i++) { + std::string myName = name; + if (numThreads != 1) myName += std::string(" ") + std::to_string(i); + + pool.emplace_back(); + auto &t = pool.back(); + + t.id = i; + t.thread = std::thread([&t, cb, myName]() { + setThreadName(myName.c_str()); + cb(t); + }); + } + } + + void dispatch(uint64_t key, M &&m) { + uint64_t who = key % numThreads; + pool[who].inbox.push_move(std::move(m)); + } + + void dispatchMulti(uint64_t key, std::vector &m) { + uint64_t who = key % numThreads; + pool[who].inbox.push_move_all(m); + } + + void dispatchToAll(std::function cb) { + for (size_t i = 0; i < numThreads; i++) pool[i].inbox.push_move(cb()); + } + + void join() { + for (size_t i = 0; i < numThreads; i++) { + pool[i].thread.join(); + } + } +}; diff --git a/src/WSConnection.h b/src/WSConnection.h new file mode 100644 index 0000000..ef596a0 --- /dev/null +++ b/src/WSConnection.h @@ -0,0 +1,133 @@ +#include +#include + +#include + +#include "golpe.h" + + +class WSConnection { + std::string url; + + uWS::Hub hub; + uWS::Group *hubGroup; + std::unique_ptr hubTrigger; + + uWS::WebSocket *currWs = nullptr; + + + public: + + WSConnection(const std::string &url) : url(url) {} + + std::function onConnect; + std::function onMessage; + std::function onTrigger; + bool reconnect = true; + uint64_t reconnectDelayMilliseconds = 5'000; + + // Should only be called from the websocket thread (ie within an onConnect or onMessage callback) + void send(std::string_view msg, uWS::OpCode op = uWS::OpCode::TEXT, size_t *compressedSize = nullptr) { + if (currWs) { + currWs->send(msg.data(), msg.size(), op, nullptr, nullptr, true, compressedSize); + } else { + LI << "Tried to send message, but websocket is disconnected"; + } + } + + // Can be called from any thread, invokes onTrigger in websocket thread context + void trigger() { + if (hubTrigger) hubTrigger->send(); + } + + void run() { + hubGroup = hub.createGroup(uWS::PERMESSAGE_DEFLATE | uWS::SLIDING_DEFLATE_WINDOW); + + + auto doConnect = [&](uint64_t delay = 0){ + if (delay) std::this_thread::sleep_for(std::chrono::milliseconds(delay)); + LI << "Attempting to connect to " << url; + hub.connect(url, nullptr, {}, 5000, hubGroup); + }; + + + hubGroup->onConnection([&](uWS::WebSocket *ws, uWS::HttpRequest req) { + if (currWs) { + currWs->terminate(); + currWs = nullptr; + } + + std::string addr = ws->getAddress().address; + LI << "Connected to " << addr; + + { + int optval = 1; + if (setsockopt(ws->getFd(), SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval))) { + LW << "Failed to enable TCP keepalive: " << strerror(errno); + } + } + + currWs = ws; + + if (!onConnect) return; + try { + onConnect(); + } catch (std::exception &e) { + LW << "onConnect failure: " << e.what(); + } + }); + + hubGroup->onDisconnection([&](uWS::WebSocket *ws, int code, char *message, size_t length) { + LI << "Disconnected"; + + if (ws == currWs) { + currWs = nullptr; + + if (!reconnect) ::exit(1); + doConnect(reconnectDelayMilliseconds); + } else { + LI << "Got disconnect for unexpected connection?"; + } + }); + + hubGroup->onError([&](void *) { + LI << "Websocket connection error"; + + if (!reconnect) ::exit(1); + doConnect(reconnectDelayMilliseconds); + }); + + hubGroup->onMessage2([&](uWS::WebSocket *ws, char *message, size_t length, uWS::OpCode opCode, size_t compressedSize) { + if (!onMessage) return; + + try { + onMessage(std::string_view(message, length), compressedSize); + } catch (std::exception &e) { + LW << "onMessage failure: " << e.what(); + } + }); + + std::function asyncCb = [&]{ + if (!onTrigger) return; + + try { + onTrigger(); + } catch (std::exception &e) { + LW << "onTrigger failure: " << e.what(); + } + }; + + hubTrigger = std::make_unique(hub.getLoop()); + hubTrigger->setData(&asyncCb); + + hubTrigger->start([](uS::Async *a){ + auto *r = static_cast *>(a->data); + (*r)(); + }); + + + doConnect(); + + hub.run(); + } +}; diff --git a/src/WriterPipeline.h b/src/WriterPipeline.h new file mode 100644 index 0000000..21d1851 --- /dev/null +++ b/src/WriterPipeline.h @@ -0,0 +1,126 @@ +#pragma once + +#include + +#include "golpe.h" + +#include "events.h" + + +struct WriterPipeline { + public: + hoytech::protected_queue inbox; + hoytech::protected_queue flushInbox; + + private: + hoytech::protected_queue writerInbox; + std::thread validatorThread; + std::thread writerThread; + + public: + WriterPipeline() { + validatorThread = std::thread([&]() { + setThreadName("Validator"); + + secp256k1_context *secpCtx = secp256k1_context_create(SECP256K1_CONTEXT_VERIFY); + + while (1) { + auto msgs = inbox.pop_all(); + + for (auto &m : msgs) { + if (m.is_null()) { + writerInbox.push_move({ "", "", 0 }); + break; + } + + std::string flatStr; + std::string jsonStr; + + try { + parseAndVerifyEvent(m, secpCtx, true, true, flatStr, jsonStr); + } catch (std::exception &e) { + LW << "Rejected event: " << m << " reason: " << e.what(); + continue; + } + + writerInbox.push_move({ std::move(flatStr), std::move(jsonStr), hoytech::curr_time_us() }); + } + } + }); + + writerThread = std::thread([&]() { + setThreadName("Writer"); + + quadrable::Quadrable qdb; + { + auto txn = env.txn_ro(); + qdb.init(txn); + } + qdb.checkout("events"); + + while (1) { + // Debounce + writerInbox.wait(); + std::this_thread::sleep_for(std::chrono::milliseconds(1'000)); + auto newEvents = writerInbox.pop_all(); + + bool flush = false; + uint64_t written = 0, dups = 0; + + // Collect a certain amount of records in a batch, push the rest back into the inbox + // Pre-filter out dups in a read-only txn as an optimisation + + std::deque newEventsToProc; + + { + auto txn = env.txn_ro(); + + for (auto &event : newEvents) { + if (newEventsToProc.size() > 1'000) { + // Put the rest back in the inbox + writerInbox.unshift_move_all(newEvents); + newEvents.clear(); + break; + } + + if (event.flatStr.size() == 0) { + flush = true; + break; + } + + auto *flat = flatStrToFlatEvent(event.flatStr); + if (lookupEventById(txn, sv(flat->id()))) { + dups++; + continue; + } + + newEventsToProc.emplace_back(std::move(event)); + } + } + + if (newEventsToProc.size()) { + { + auto txn = env.txn_rw(); + writeEvents(txn, qdb, newEventsToProc); + txn.commit(); + } + + for (auto &ev : newEventsToProc) { + if (ev.status == EventWriteStatus::Written) written++; + else dups++; + // FIXME: log rejected stats too + } + } + + LI << "Writer: added: " << written << " dups: " << dups; + + if (flush) flushInbox.push_move(true); + } + }); + } + + void flush() { + inbox.push_move(tao::json::null); + flushInbox.wait(); + } +}; diff --git a/src/cmd_export.cpp b/src/cmd_export.cpp new file mode 100644 index 0000000..5228d41 --- /dev/null +++ b/src/cmd_export.cpp @@ -0,0 +1,39 @@ +#include + +#include +#include "golpe.h" + +#include "events.h" + + +static const char USAGE[] = +R"( + Usage: + export [--since=] [--until=] [--include-ephemeral] +)"; + + +void cmd_export(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + uint64_t since = 0, until = MAX_U64; + if (args["--since"]) since = args["--since"].asLong(); + if (args["--until"]) until = args["--until"].asLong(); + + auto txn = env.txn_ro(); + + env.generic_foreachFull(txn, env.dbi_Event__created_at, lmdb::to_sv(since), lmdb::to_sv(0), [&](auto k, auto v) { + if (lmdb::from_sv(k) > until) return false; + + auto view = env.lookup_Event(txn, lmdb::from_sv(v)); + if (!view) throw herr("missing event from index, corrupt DB?"); + + if (!args["--include-ephemeral"].asBool()) { + if (isEphemeralEvent(view->flat_nested()->kind())) return true; + } + + std::cout << getEventJson(txn, view->primaryKeyId) << "\n"; + + return true; + }); +} diff --git a/src/cmd_import.cpp b/src/cmd_import.cpp new file mode 100644 index 0000000..3a84636 --- /dev/null +++ b/src/cmd_import.cpp @@ -0,0 +1,95 @@ +#include + +#include +#include "golpe.h" + +#include "events.h" +#include "filters.h" + + +static const char USAGE[] = +R"( + Usage: + import [--show-rejected] [--no-verify] +)"; + + +void cmd_import(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + bool showRejected = args["--show-rejected"].asBool(); + bool noVerify = args["--no-verify"].asBool(); + + if (noVerify) LW << "not verifying event IDs or signatures!"; + + quadrable::Quadrable qdb; + { + auto txn = env.txn_ro(); + qdb.init(txn); + } + qdb.checkout("events"); + + auto txn = env.txn_rw(); + + secp256k1_context *secpCtx = secp256k1_context_create(SECP256K1_CONTEXT_VERIFY); + + std::string line; + uint64_t processed = 0, added = 0, rejected = 0, dups = 0; + std::deque newEvents; + + auto logStatus = [&]{ + LI << "Processed " << processed << " lines. " << added << " added, " << rejected << " rejected, " << dups << " dups"; + }; + + auto flushChanges = [&]{ + writeEvents(txn, qdb, newEvents); + + uint64_t numCommits = 0; + + for (auto &newEvent : newEvents) { + if (newEvent.status == EventWriteStatus::Written) { + added++; + numCommits++; + } else if (newEvent.status == EventWriteStatus::Duplicate) { + dups++; + } else { + rejected++; + } + } + + logStatus(); + LI << "Committing " << numCommits << " records"; + txn.commit(); + + txn = env.txn_rw(); + newEvents.clear(); + }; + + + while (std::cin) { + std::getline(std::cin, line); + if (!line.size()) continue; + + processed++; + + std::string flatStr; + std::string jsonStr; + + try { + auto origJson = tao::json::from_string(line); + parseAndVerifyEvent(origJson, secpCtx, !noVerify, false, flatStr, jsonStr); + } catch (std::exception &e) { + if (showRejected) LW << "Line " << processed << " rejected: " << e.what(); + rejected++; + continue; + } + + newEvents.emplace_back(std::move(flatStr), std::move(jsonStr), hoytech::curr_time_us()); + + if (newEvents.size() >= 10'000) flushChanges(); + } + + flushChanges(); + + txn.commit(); +} diff --git a/src/cmd_info.cpp b/src/cmd_info.cpp new file mode 100644 index 0000000..835e22b --- /dev/null +++ b/src/cmd_info.cpp @@ -0,0 +1,27 @@ +#include + +#include +#include "golpe.h" + + +static const char USAGE[] = +R"( + Usage: + info +)"; + + +void cmd_info(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + quadrable::Quadrable qdb; + { + auto txn = env.txn_ro(); + qdb.init(txn); + } + qdb.checkout("events"); + + auto txn = env.txn_ro(); + + std::cout << "merkle root: " << to_hex(qdb.root(txn)) << "\n"; +} diff --git a/src/cmd_monitor.cpp b/src/cmd_monitor.cpp new file mode 100644 index 0000000..03d3c00 --- /dev/null +++ b/src/cmd_monitor.cpp @@ -0,0 +1,66 @@ +#include + +#include +#include "golpe.h" + +#include "ActiveMonitors.h" +#include "events.h" + + +static const char USAGE[] = +R"( + Usage: + monitor +)"; + + +// echo '["sub",1,"mysub",{"authors":["47f7163b"]}]' | ./strfry monitor + +void cmd_monitor(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + auto txn = env.txn_ro(); + + ActiveMonitors monitors; + + std::string line; + uint64_t interestConnId = 0; + std::string interestSubId; + + while (std::cin) { + std::getline(std::cin, line); + if (!line.size()) continue; + + auto msg = tao::json::from_string(line); + auto &msgArr = msg.get_array(); + + auto cmd = msgArr.at(0).get_string(); + + if (cmd == "sub") { + Subscription sub(msgArr.at(1).get_unsigned(), msgArr.at(2).get_string(), NostrFilterGroup::unwrapped(msgArr.at(3))); + sub.latestEventId = 0; + monitors.addSub(txn, std::move(sub), 0); + } else if (cmd == "removeSub") { + monitors.removeSub(msgArr.at(1).get_unsigned(), SubId(msgArr.at(2).get_string())); + } else if (cmd == "closeConn") { + monitors.closeConn(msgArr.at(1).get_unsigned()); + } else if (cmd == "interest") { + if (interestConnId) throw herr("interest already set"); + interestConnId = msgArr.at(1).get_unsigned(); + interestSubId = msgArr.at(2).get_string(); + } else { + throw herr("unknown cmd"); + } + } + + env.foreach_Event(txn, [&](auto &ev){ + monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t quadId){ + for (auto &r : recipients) { + if (r.connId == interestConnId && r.subId.str() == interestSubId) { + std::cout << getEventJson(txn, quadId) << "\n"; + } + } + }); + return true; + }); +} diff --git a/src/cmd_relay.cpp b/src/cmd_relay.cpp new file mode 100644 index 0000000..bc2f0d0 --- /dev/null +++ b/src/cmd_relay.cpp @@ -0,0 +1,56 @@ +#include "RelayServer.h" + + + +void cmd_relay(const std::vector &subArgs) { + RelayServer s; + s.run(); +} + +void RelayServer::run() { + tpWebsocket.init("Websocket", 1, [this](auto &thr){ + runWebsocket(thr); + }); + + tpIngester.init("Ingester", cfg().relay__numThreads__ingester, [this](auto &thr){ + runIngester(thr); + }); + + tpWriter.init("Writer", 1, [this](auto &thr){ + runWriter(thr); + }); + + tpReqWorker.init("ReqWorker", cfg().relay__numThreads__reqWorker, [this](auto &thr){ + runReqWorker(thr); + }); + + tpReqMonitor.init("ReqMonitor", cfg().relay__numThreads__reqMonitor, [this](auto &thr){ + runReqMonitor(thr); + }); + + tpYesstr.init("Yesstr", cfg().relay__numThreads__yesstr, [this](auto &thr){ + runYesstr(thr); + }); + + // Monitor for config file reloads + + auto configFileChangeWatcher = hoytech::file_change_monitor(configFile); + + configFileChangeWatcher.setDebounce(100); + + configFileChangeWatcher.run([&](){ + loadConfig(configFile); + }); + + // Cron + + cron.repeat(10 * 1'000'000UL, [&]{ + cleanupOldEvents(); + }); + + cron.setupCb = []{ setThreadName("cron"); }; + + cron.run(); + + tpWebsocket.join(); +} diff --git a/src/cmd_scan.cpp b/src/cmd_scan.cpp new file mode 100644 index 0000000..1f3a0d0 --- /dev/null +++ b/src/cmd_scan.cpp @@ -0,0 +1,41 @@ +#include + +#include +#include "golpe.h" + +#include "DBScan.h" +#include "events.h" + + +static const char USAGE[] = +R"( + Usage: + scan [--pause=] +)"; + + +void cmd_scan(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + uint64_t pause = 0; + if (args["--pause"]) pause = args["--pause"].asLong(); + + + std::string filterStr = args[""].asString(); + auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr)); + + Subscription sub(1, "junkSub", filterGroup); + + DBScanQuery query(sub); + + + auto txn = env.txn_ro(); + + while (1) { + bool complete = query.process(txn, pause ? pause : MAX_U64, [&](const auto &sub, uint64_t quadId){ + std::cout << getEventJson(txn, quadId) << "\n"; + }); + + if (complete) break; + } +} diff --git a/src/cmd_stream.cpp b/src/cmd_stream.cpp new file mode 100644 index 0000000..eb51bdf --- /dev/null +++ b/src/cmd_stream.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include + +#include "golpe.h" + +#include "WriterPipeline.h" +#include "Subscription.h" +#include "WSConnection.h" +#include "events.h" + + +static const char USAGE[] = +R"( + Usage: + stream [--dir=] + + Options: + --dir= Direction: down, up, or both [default: down] +)"; + + + +void cmd_stream(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + std::string url = args[""].asString(); + std::string dir = args["--dir"] ? args["--dir"].asString() : "down"; + + if (dir != "up" && dir != "down" && dir != "both") throw herr("invalid direction: ", dir, ". Should be one of up/down/both"); + + + std::unordered_set downloadedIds; + WriterPipeline writer; + WSConnection ws(url); + + ws.onConnect = [&]{ + if (dir == "down" || dir == "both") { + auto encoded = tao::json::to_string(tao::json::value::array({ "REQ", "sub", tao::json::value({ { "limit", 0 } }) })); + ws.send(encoded); + } + }; + + ws.onMessage = [&](auto msg, size_t){ + auto origJson = tao::json::from_string(msg); + + if (origJson.is_array()) { + if (origJson.get_array().size() < 2) throw herr("array too short"); + + auto &msgType = origJson.get_array().at(0); + if (msgType == "EOSE") { + return; + } else if (msgType == "NOTICE") { + LW << "NOTICE message: " << tao::json::to_string(origJson); + return; + } else if (msgType == "OK") { + if (!origJson.get_array().at(2).get_boolean()) { + LW << "Event not written: " << origJson; + } + } else if (msgType == "EVENT") { + if (dir == "down" || dir == "both") { + if (origJson.get_array().size() < 3) throw herr("array too short"); + auto &evJson = origJson.at(2); + downloadedIds.emplace(from_hex(evJson.at("id").get_string())); + writer.inbox.push_move(std::move(evJson)); + } else { + LW << "Unexpected EVENT"; + } + } else { + throw herr("unexpected first element"); + } + } else { + throw herr("unexpected message"); + } + }; + + + uint64_t currEventId; + + { + auto txn = env.txn_ro(); + currEventId = getMostRecentEventId(txn); + } + + ws.onTrigger = [&]{ + if (dir == "down") return; + + auto txn = env.txn_ro(); + + env.foreach_Event(txn, [&](auto &ev){ + currEventId = ev.primaryKeyId; + + auto id = std::string(sv(ev.flat_nested()->id())); + if (downloadedIds.find(id) != downloadedIds.end()) { + downloadedIds.erase(id); + return true; + } + + std::string msg = std::string("[\"EVENT\","); + msg += getEventJson(txn, ev.primaryKeyId); + msg += "]"; + + ws.send(msg); + + return true; + }, false, currEventId + 1); + }; + + std::unique_ptr dbChangeWatcher; + + if (dir == "up" || dir == "both") { + dbChangeWatcher = std::make_unique(dbDir + "/data.mdb"); + + dbChangeWatcher->setDebounce(100); + + dbChangeWatcher->run([&](){ + ws.trigger(); + }); + } + + + ws.run(); +} diff --git a/src/cmd_sync.cpp b/src/cmd_sync.cpp new file mode 100644 index 0000000..5a1d92e --- /dev/null +++ b/src/cmd_sync.cpp @@ -0,0 +1,229 @@ +#include +#include + +#include +#include + +#include "golpe.h" + +#include "WriterPipeline.h" +#include "Subscription.h" +#include "WSConnection.h" +#include "DBScan.h" +#include "filters.h" +#include "events.h" +#include "yesstr.h" + + +static const char USAGE[] = +R"( + Usage: + sync [--filter=] [--dir=] + + Options: + --filter= Nostr filter (either single filter object or array of filters) + --dir= Direction: down, up, or both [default: down] +)"; + + +struct SyncController { + quadrable::Quadrable *qdb; + WSConnection *ws; + + quadrable::Quadrable::Sync sync; + quadrable::MemStore m; + + uint64_t ourNodeId = 0; + quadrable::SyncRequests reqs; + bool sentFirstReq = false; + + SyncController(quadrable::Quadrable *qdb_, WSConnection *ws_) : qdb(qdb_), ws(ws_), sync(qdb_) { } + + void init(lmdb::txn &txn) { + qdb->withMemStore(m, [&]{ + qdb->writeToMemStore = true; + ourNodeId = qdb->getHeadNodeId(txn); + sync.init(txn, ourNodeId); + }); + } + + bool sendRequests(lmdb::txn &txn, const std::string &filterStr) { + qdb->withMemStore(m, [&]{ + qdb->writeToMemStore = true; + reqs = sync.getReqs(txn, 10'000); + }); + + if (reqs.size() == 0) return false; + + std::string reqsEncoded = quadrable::transport::encodeSyncRequests(reqs); + + flatbuffers::FlatBufferBuilder builder; + + auto reqOffset = Yesstr::CreateRequest(builder, + 123, + Yesstr::RequestPayload::RequestPayload_RequestSync, + Yesstr::CreateRequestSync(builder, + (filterStr.size() && !sentFirstReq) ? builder.CreateString(filterStr) : 0, + builder.CreateVector((uint8_t*)reqsEncoded.data(), reqsEncoded.size()) + ).Union() + ); + + builder.Finish(reqOffset); + + std::string reqMsg = std::string("Y") + std::string(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()); + size_t compressedSize; + ws->send(reqMsg, uWS::OpCode::BINARY, &compressedSize); + LI << "SEND size=" << reqMsg.size() << " compressed=" << compressedSize; + + sentFirstReq = true; + + return true; + } + + void handleResponses(lmdb::txn &txn, std::string_view msg) { + verifyYesstrResponse(msg); + const auto *resp = parseYesstrResponse(msg); + const auto *respSync = resp->payload_as_ResponseSync(); + + auto resps = quadrable::transport::decodeSyncResponses(sv(respSync->respsEncoded())); + + qdb->withMemStore(m, [&]{ + qdb->writeToMemStore = true; + sync.addResps(txn, reqs, resps); + }); + } + + void finish(lmdb::txn &txn, std::function onNewLeaf, std::function onMissingLeaf) { + qdb->withMemStore(m, [&]{ + qdb->writeToMemStore = true; + + sync.diff(txn, ourNodeId, sync.nodeIdShadow, [&](auto dt, const auto &node){ + if (dt == quadrable::Quadrable::DiffType::Added) { + // node exists only on the provider-side + LI << "NEW LEAF: " << node.leafVal(); + onNewLeaf(node.leafVal()); + } else if (dt == quadrable::Quadrable::DiffType::Deleted) { + // node exists only on the syncer-side + LI << "MISSING LEAF: " << node.leafVal(); + onMissingLeaf(node.leafVal()); + } else if (dt == quadrable::Quadrable::DiffType::Changed) { + // nodes differ. node is the one on the provider-side + } + }); + }); + } +}; + + + +void cmd_sync(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + std::string url = args[""].asString(); + std::string filterStr; + if (args["--filter"]) filterStr = args["--filter"].asString(); + std::string dir = args["--dir"] ? args["--dir"].asString() : "down"; + if (dir != "up" && dir != "down" && dir != "both") throw herr("invalid direction: ", dir, ". Should be one of up/down/both"); + if (dir != "down") throw herr("only down currently supported"); // FIXME + + + std::unique_ptr controller; + WriterPipeline writer; + WSConnection ws(url); + + quadrable::Quadrable qdb; + { + auto txn = env.txn_ro(); + qdb.init(txn); + } + qdb.checkout("events"); + + + + ws.reconnect = false; + + + + if (filterStr.size()) { + std::vector quadEventIds; + + std::string filterStr = args["--filter"].asString(); + auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr)); + + Subscription sub(1, "junkSub", filterGroup); + + DBScanQuery query(sub); + auto txn = env.txn_ro(); + + while (1) { + bool complete = query.process(txn, MAX_U64, [&](const auto &sub, uint64_t quadId){ + quadEventIds.push_back(quadId); + }); + + if (complete) break; + } + + LI << "Filter matched " << quadEventIds.size() << " local events"; + + controller = std::make_unique(&qdb, &ws); + + qdb.withMemStore(controller->m, [&]{ + qdb.writeToMemStore = true; + qdb.checkout(); + + auto changes = qdb.change(); + + for (auto id : quadEventIds) { + changes.putReuse(txn, id); + } + + changes.apply(txn); + }); + + controller->init(txn); + } else { + auto txn = env.txn_ro(); + + controller = std::make_unique(&qdb, &ws); + controller->init(txn); + } + + + + ws.onConnect = [&]{ + auto txn = env.txn_ro(); + + controller->sendRequests(txn, filterStr); + }; + + ws.onMessage = [&](auto msg, size_t compressedSize){ + auto txn = env.txn_ro(); + + if (!controller) { + LW << "No sync active, ignoring message"; + return; + } + + LI << "RECV size=" << msg.size() << " compressed=" << compressedSize; + controller->handleResponses(txn, msg); + + if (!controller->sendRequests(txn, filterStr)) { + LI << "Syncing done, writing/sending events"; + controller->finish(txn, + [&](std::string_view newLeaf){ + writer.inbox.push_move(tao::json::from_string(std::string(newLeaf))); + }, + [&](std::string_view){ + } + ); + + writer.flush(); + ::exit(0); + } + }; + + + + + ws.run(); +} diff --git a/src/constants.h b/src/constants.h new file mode 100644 index 0000000..9d27dc3 --- /dev/null +++ b/src/constants.h @@ -0,0 +1,3 @@ +#pragma once + +const size_t MAX_SUBID_SIZE = 39; diff --git a/src/events.cpp b/src/events.cpp new file mode 100644 index 0000000..4592aaf --- /dev/null +++ b/src/events.cpp @@ -0,0 +1,234 @@ +#include "events.h" + + + +std::string nostrJsonToFlat(const tao::json::value &v) { + flatbuffers::FlatBufferBuilder builder; // FIXME: pre-allocate size approximately the same as orig JSON? + + // Extract values from JSON, add strings to builder + + auto loadHexStr = [&](std::string_view k, uint64_t size){ + auto s = from_hex(v.at(k).get_string(), false); + if (s.size() != size) throw herr("unexpected size of hex data"); + return builder.CreateVector((uint8_t*)s.data(), s.size()); + }; + + auto idPtr = loadHexStr("id", 32); + auto pubkeyPtr = loadHexStr("pubkey", 32); + uint64_t created_at = v.at("created_at").get_unsigned(); + uint64_t kind = v.at("kind").get_unsigned(); + + std::vector> tagPtrs; + if (v.at("tags").get_array().size() > cfg().events__maxNumTags) throw herr("too many tags: ", v.at("tags").get_array().size()); + for (auto &tagArr : v.at("tags").get_array()) { + auto &tag = tagArr.get_array(); + if (tag.size() < 2) throw herr("too few fields in tag"); + + auto tagName = tag.at(0).get_string(); + if (tagName.size() != 1) continue; // only single-char tags need indexing + + auto tagVal = tag.at(1).get_string(); + if (tagVal.size() < 1 || tagVal.size() > cfg().events__maxTagValSize) throw herr("tag val too small/large: ", tagVal.size()); + if (tagName == "e" || tagName == "p") { + tagVal = from_hex(tagVal, false); + if (tagVal.size() != 32) throw herr("unexpected size for e/p tag"); + } + auto tagValPtr = builder.CreateVector((uint8_t*)tagVal.data(), tagVal.size()); + + tagPtrs.push_back(NostrIndex::CreateTag(builder, (uint8_t)tagName[0], tagValPtr)); + } + auto tagsPtr = builder.CreateVector>(tagPtrs); + + // Create flatbuffer + + auto eventPtr = NostrIndex::CreateEvent(builder, idPtr, pubkeyPtr, created_at, kind, tagsPtr); + + builder.Finish(eventPtr); + + return std::string(reinterpret_cast(builder.GetBufferPointer()), builder.GetSize()); +} + +std::string nostrHash(const tao::json::value &origJson) { + tao::json::value arr = tao::json::empty_array; + + arr.emplace_back(0); + + arr.emplace_back(origJson.at("pubkey")); + arr.emplace_back(origJson.at("created_at")); + arr.emplace_back(origJson.at("kind")); + arr.emplace_back(origJson.at("tags")); + arr.emplace_back(origJson.at("content")); + + std::string encoded = tao::json::to_string(arr); + + unsigned char hash[SHA256_DIGEST_LENGTH]; + SHA256_CTX sha256; + SHA256_Init(&sha256); + SHA256_Update(&sha256, encoded.data(), encoded.size()); + SHA256_Final(hash, &sha256); + + return std::string(((char*)hash), SHA256_DIGEST_LENGTH); +} + +bool verifySig(secp256k1_context* ctx, std::string_view sig, std::string_view hash, std::string_view pubkey) { + if (sig.size() != 64 || hash.size() != 32 || pubkey.size() != 32) throw herr("verify sig: bad input size"); + + secp256k1_xonly_pubkey pubkeyParsed; + if (!secp256k1_xonly_pubkey_parse(ctx, &pubkeyParsed, (const uint8_t*)pubkey.data())) throw herr("verify sig: bad pubkey"); + + return secp256k1_schnorrsig_verify(ctx, (const uint8_t*)sig.data(), (const uint8_t*)hash.data(), &pubkeyParsed); +} + +void verifyNostrEvent(secp256k1_context *secpCtx, const NostrIndex::Event *flat, const tao::json::value &origJson) { + auto hash = nostrHash(origJson); + if (hash != sv(flat->id())) throw herr("bad event id"); + + bool valid = verifySig(secpCtx, from_hex(origJson.at("sig").get_string(), false), sv(flat->id()), sv(flat->pubkey())); + if (!valid) throw herr("bad signature"); +} + +void verifyNostrEventJsonSize(std::string_view jsonStr) { + if (jsonStr.size() > cfg().events__maxEventSize) throw herr("event too large: ", jsonStr.size()); +} + +void verifyEventTimestamp(const NostrIndex::Event *flat) { + auto now = hoytech::curr_time_s(); + auto ts = flat->created_at(); + + uint64_t earliest = now - (isEphemeralEvent(flat->kind()) ? cfg().events__rejectEphemeralEventsOlderThanSeconds : cfg().events__rejectEventsOlderThanSeconds); + uint64_t latest = now + cfg().events__rejectEventsNewerThanSeconds; + + if (ts < earliest) throw herr("created_at too early"); + if (ts > latest) throw herr("created_at too late"); +} + +void parseAndVerifyEvent(const tao::json::value &origJson, secp256k1_context *secpCtx, bool verifyMsg, bool verifyTime, std::string &flatStr, std::string &jsonStr) { + flatStr = nostrJsonToFlat(origJson); + auto *flat = flatbuffers::GetRoot(flatStr.data()); + if (verifyTime) verifyEventTimestamp(flat); + if (verifyMsg) verifyNostrEvent(secpCtx, flat, origJson); + + // Build new object to remove unknown top-level fields from json + jsonStr = tao::json::to_string(tao::json::value({ + { "content", &origJson.at("content") }, + { "created_at", &origJson.at("created_at") }, + { "id", &origJson.at("id") }, + { "kind", &origJson.at("kind") }, + { "pubkey", &origJson.at("pubkey") }, + { "sig", &origJson.at("sig") }, + { "tags", &origJson.at("tags") }, + })); + + if (verifyMsg) verifyNostrEventJsonSize(jsonStr); +} + + + + + + +std::optional lookupEventById(lmdb::txn &txn, std::string_view id) { + std::optional output; + + env.generic_foreachFull(txn, env.dbi_Event__id, makeKey_StringUint64(id, 0), lmdb::to_sv(0), [&](auto k, auto v) { + if (k.starts_with(id)) output = env.lookup_Event(txn, lmdb::from_sv(v)); + return false; + }); + + return output; +} + +uint64_t getMostRecentEventId(lmdb::txn &txn) { + uint64_t output = 0; + + env.foreach_Event(txn, [&](auto &ev){ + output = ev.primaryKeyId; + return false; + }, true); + + return output; +} + +std::string_view getEventJson(lmdb::txn &txn, uint64_t quadId) { + std::string_view raw; + bool found = env.dbiQuadrable_nodesLeaf.get(txn, lmdb::to_sv(quadId), raw); + if (!found) throw herr("couldn't find leaf node in quadrable, corrupted DB?"); + return raw.substr(8 + 32 + 32); +} + + + +void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::deque &evs) { + auto changes = qdb.change(); + + std::vector eventIdsToDelete; + + for (auto &ev : evs) { + const NostrIndex::Event *flat = flatbuffers::GetRoot(ev.flatStr.data()); + + if (lookupEventById(txn, sv(flat->id()))) { + ev.status = EventWriteStatus::Duplicate; + continue; + } + + if (env.lookup_Event__deletion(txn, std::string(sv(flat->id())) + std::string(sv(flat->pubkey())))) { + ev.status = EventWriteStatus::Deleted; + continue; + } + + if (isReplaceableEvent(flat->kind())) { + auto searchKey = makeKey_StringUint64Uint64(sv(flat->pubkey()), flat->kind(), MAX_U64); + uint64_t otherEventId = 0; + + env.generic_foreachFull(txn, env.dbi_Event__pubkeyKind, searchKey, lmdb::to_sv(MAX_U64), [&](auto k, auto v) { + ParsedKey_StringUint64Uint64 parsedKey(k); + if (parsedKey.s == sv(flat->pubkey()) && parsedKey.n1 == flat->kind()) { + if (parsedKey.n2 < flat->created_at()) { + otherEventId = lmdb::from_sv(v); + } else { + ev.status = EventWriteStatus::Replaced; + } + } + return false; + }, true); + + if (otherEventId) { + auto otherEv = env.lookup_Event(txn, otherEventId); + if (!otherEv) throw herr("missing event from index, corrupt DB?"); + changes.del(flatEventToQuadrableKey(otherEv->flat_nested())); + eventIdsToDelete.push_back(otherEventId); + } + } + + if (flat->kind() == 5) { + // Deletion event, delete all referenced events + for (const auto &tagPair : *(flat->tags())) { + if (tagPair->key() == 'e') { + auto otherEv = lookupEventById(txn, sv(tagPair->val())); + if (otherEv && sv(otherEv->flat_nested()->pubkey()) == sv(flat->pubkey())) { + LI << "Deleting event. id=" << to_hex(sv(tagPair->val())); + changes.del(flatEventToQuadrableKey(otherEv->flat_nested())); + eventIdsToDelete.push_back(otherEv->primaryKeyId); + } + } + } + } + + if (ev.status == EventWriteStatus::Pending) { + changes.put(flatEventToQuadrableKey(flat), ev.jsonStr, &ev.nodeId); + } + } + + changes.apply(txn); + + for (auto eventId : eventIdsToDelete) { + env.delete_Event(txn, eventId); + } + + for (auto &ev : evs) { + if (ev.status == EventWriteStatus::Pending) { + env.insert_Event(txn, ev.nodeId, ev.receivedAt, ev.flatStr); + ev.status = EventWriteStatus::Written; + } + } +} diff --git a/src/events.h b/src/events.h new file mode 100644 index 0000000..0b04108 --- /dev/null +++ b/src/events.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include + +#include "golpe.h" + +#include "constants.h" + + + + +inline bool isReplaceableEvent(uint64_t kind) { + return ( + kind == 0 || + kind == 3 || + kind == 41 || + (kind >= 10'000 && kind < 20'000) + ); +} + +inline bool isEphemeralEvent(uint64_t kind) { + return ( + (kind >= 20'000 && kind < 30'000) + ); +} + + + + +std::string nostrJsonToFlat(const tao::json::value &v); +std::string nostrHash(const tao::json::value &origJson); + +bool verifySig(secp256k1_context* ctx, std::string_view sig, std::string_view hash, std::string_view pubkey); +void verifyNostrEvent(secp256k1_context *secpCtx, const NostrIndex::Event *flat, const tao::json::value &origJson); +void verifyNostrEventJsonSize(std::string_view jsonStr); +void verifyEventTimestamp(const NostrIndex::Event *flat); + +void parseAndVerifyEvent(const tao::json::value &origJson, secp256k1_context *secpCtx, bool verifyMsg, bool verifyTime, std::string &flatStr, std::string &jsonStr); + + +// Does not do verification! +inline const NostrIndex::Event *flatStrToFlatEvent(std::string_view flatStr) { + return flatbuffers::GetRoot(flatStr.data()); +} + + +std::optional lookupEventById(lmdb::txn &txn, std::string_view id); +uint64_t getMostRecentEventId(lmdb::txn &txn); +std::string_view getEventJson(lmdb::txn &txn, uint64_t quadId); + +inline quadrable::Key flatEventToQuadrableKey(const NostrIndex::Event *flat) { + return quadrable::Key::fromIntegerAndHash(flat->created_at(), sv(flat->id()).substr(0, 23)); +} + + + + + + +enum class EventWriteStatus { + Pending, + Written, + Duplicate, + Replaced, + Deleted, +}; + + +struct EventToWrite { + std::string flatStr; + std::string jsonStr; + uint64_t receivedAt; + void *userData = nullptr; + uint64_t nodeId = 0; + EventWriteStatus status = EventWriteStatus::Pending; +}; + + +void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::deque &evs); diff --git a/src/filters.h b/src/filters.h new file mode 100644 index 0000000..d994a01 --- /dev/null +++ b/src/filters.h @@ -0,0 +1,248 @@ +#pragma once + +#include "golpe.h" + +#include "constants.h" + + +struct FilterSetBytes { + struct Item { + uint16_t offset; + uint8_t size; + uint8_t firstByte; + }; + + std::vector items; + std::string buf; + + FilterSetBytes() {} + + // Sizes are post-hex decode + + void init(const tao::json::value &arrHex, bool hexDecode, size_t minSize, size_t maxSize) { + std::vector arr; + + uint64_t totalSize = 0; + + for (const auto &i : arrHex.get_array()) { + arr.emplace_back(hexDecode ? from_hex(i.get_string(), false) : i.get_string()); + size_t itemSize = arr.back().size(); + if (itemSize < minSize) throw herr("filter item too small"); + if (itemSize > maxSize) throw herr("filter item too large"); + totalSize += itemSize; + } + + if (arr.size() == 0) throw herr("empty filter item"); + + std::sort(arr.begin(), arr.end()); + + for (const auto &item : arr) { + if (items.size() > 0 && item.starts_with(at(items.size() - 1))) continue; // remove duplicates and redundant prefixes + items.emplace_back(Item{ (uint16_t)buf.size(), (uint8_t)item.size(), (uint8_t)item[0] }); + buf += item; + } + + if (buf.size() > 65535) throw herr("total filter items too large"); + } + + std::string at(size_t n) const { + if (n >= items.size()) throw("FilterSetBytes access out of bounds"); + auto &item = items[n]; + return buf.substr(item.offset, item.size); + } + + size_t size() const { + return items.size(); + } + + bool doesMatch(std::string_view candidate) const { + if (candidate.size() == 0) throw herr("invalid candidate"); + + // Binary search for upper-bound: https://en.cppreference.com/w/cpp/algorithm/upper_bound + + ssize_t first = 0, last = items.size(), curr; + ssize_t count = last - first, step; + + while (count > 0) { + curr = first; + step = count / 2; + curr += step; + + bool comp = (uint8_t)candidate[0] != items[curr].firstByte + ? (uint8_t)candidate[0] < items[curr].firstByte + : candidate < std::string_view(buf.data() + items[curr].offset, items[curr].size); + + if (!comp) { + first = ++curr; + count -= step + 1; + } else { + count = step; + } + } + + if (first == 0) return false; + if (candidate.starts_with(std::string_view(buf.data() + items[first - 1].offset, items[first - 1].size))) return true; + + return false; + } +}; + +struct FilterSetUint { + std::vector items; + + FilterSetUint() {} + + void init(const tao::json::value &arr) { + for (const auto &i : arr.get_array()) { + items.push_back(i.get_unsigned()); + } + + if (items.size() == 0) throw herr("empty filter item"); + + std::sort(items.begin(), items.end()); + + items.erase(std::unique(items.begin(), items.end()), items.end()); // remove duplicates + } + + uint64_t at(size_t n) const { + if (n >= items.size()) throw("FilterSetBytes access out of bounds"); + return items[n]; + } + + size_t size() const { + return items.size(); + } + + bool doesMatch(uint64_t candidate) const { + return std::binary_search(items.begin(), items.end(), candidate); + } +}; + +struct NostrFilter { + FilterSetBytes ids; + FilterSetBytes authors; + FilterSetUint kinds; + std::map tags; + + uint64_t since = 0; + uint64_t until = MAX_U64; + uint64_t limit = MAX_U64; + bool indexOnlyScans = false; + + explicit NostrFilter(const tao::json::value &filterObj) { + uint64_t numMajorFields = 0; + + for (const auto &[k, v] : filterObj.get_object()) { + if (k == "ids") { + ids.init(v, true, 1, 32); + numMajorFields++; + } else if (k == "authors") { + authors.init(v, true, 1, 32); + numMajorFields++; + } else if (k == "kinds") { + kinds.init(v); + numMajorFields++; + } else if (k.starts_with('#')) { + numMajorFields++; + if (k.size() == 2) { + char tag = k[1]; + auto [it, _] = tags.emplace(tag, FilterSetBytes{}); + + if (tag == 'p' || tag == 'e') { + it->second.init(v, true, 32, 32); + } else { + it->second.init(v, false, 1, cfg().events__maxTagValSize); + } + } else { + throw herr("unindexed tag filter"); + } + } else if (k == "since") { + since = v.get_unsigned(); + } else if (k == "until") { + until = v.get_unsigned(); + } else if (k == "limit") { + limit = v.get_unsigned(); + } else { + throw herr("unrecognised filter item"); + } + } + + if (tags.size() > 2) throw herr("too many tags in filter"); // O(N^2) in matching, just prohibit it + + if (limit > cfg().relay__maxFilterLimit) limit = cfg().relay__maxFilterLimit; + + indexOnlyScans = numMajorFields <= 1; + // FIXME: pubkeyKind scan could be serviced index-only too + } + + bool doesMatchTimes(uint64_t created) const { + if (created < since) return false; + if (created > until) return false; + return true; + } + + bool doesMatch(const NostrIndex::Event *ev) const { + if (!doesMatchTimes(ev->created_at())) return false; + + if (ids.size() && !ids.doesMatch(sv(ev->id()))) return false; + if (authors.size() && !authors.doesMatch(sv(ev->pubkey()))) return false; + if (kinds.size() && !kinds.doesMatch(ev->kind())) return false; + + for (const auto &[tag, filt] : tags) { + bool foundMatch = false; + + for (const auto &tagPair : *(ev->tags())) { + auto eventTag = tagPair->key(); + if (eventTag == tag && filt.doesMatch(sv(tagPair->val()))) { + foundMatch = true; + break; + } + } + + if (!foundMatch) return false; + } + + return true; + } +}; + +struct NostrFilterGroup { + std::vector filters; + + // Note that this expects the full array, so the first two items are "REQ" and the subId + NostrFilterGroup(const tao::json::value &req) { + const auto &arr = req.get_array(); + if (arr.size() < 3) throw herr("too small"); + + for (size_t i = 2; i < arr.size(); i++) { + filters.emplace_back(arr[i]); + } + } + + // Hacky! Deserves a refactor + static NostrFilterGroup unwrapped(tao::json::value filter) { + if (!filter.is_array()) { + filter = tao::json::value::array({ filter }); + } + + tao::json::value pretendReqQuery = tao::json::value::array({ "REQ", "junkSub" }); + + for (auto &e : filter.get_array()) { + pretendReqQuery.push_back(e); + } + + return NostrFilterGroup(pretendReqQuery); + } + + bool doesMatch(const NostrIndex::Event *ev) const { + for (const auto &f : filters) { + if (f.doesMatch(ev)) return true; + } + + return false; + } + + size_t size() const { + return filters.size(); + } +}; diff --git a/src/yesstr.h b/src/yesstr.h new file mode 100644 index 0000000..72570a1 --- /dev/null +++ b/src/yesstr.h @@ -0,0 +1,29 @@ +#pragma once + +#include "golpe.h" + + +inline void verifyYesstrRequest(std::string_view msg) { + if (!msg.starts_with("Y")) throw herr("invalid yesstr magic char"); + msg = msg.substr(1); + auto verifier = flatbuffers::Verifier(reinterpret_cast(msg.data()), msg.size()); + bool ok = verifier.VerifyBuffer(nullptr); + if (!ok) throw herr("yesstr request verification failed"); +} + +inline void verifyYesstrResponse(std::string_view msg) { + if (!msg.starts_with("Y")) throw herr("invalid yesstr magic char"); + msg = msg.substr(1); + auto verifier = flatbuffers::Verifier(reinterpret_cast(msg.data()), msg.size()); + bool ok = verifier.VerifyBuffer(nullptr); + if (!ok) throw herr("yesstr response verification failed"); +} + + +inline const Yesstr::Request *parseYesstrRequest(std::string_view msg) { + return flatbuffers::GetRoot(msg.substr(1).data()); +} + +inline const Yesstr::Response *parseYesstrResponse(std::string_view msg) { + return flatbuffers::GetRoot(msg.substr(1).data()); +} diff --git a/strfry-db/.keep b/strfry-db/.keep new file mode 100644 index 0000000..e69de29 diff --git a/strfry.conf b/strfry.conf new file mode 100644 index 0000000..78a6884 --- /dev/null +++ b/strfry.conf @@ -0,0 +1,5 @@ +db = "./strfry-db/" + +relay { + port = 7777 +} diff --git a/test/dumbFilter.pl b/test/dumbFilter.pl new file mode 100644 index 0000000..43243b3 --- /dev/null +++ b/test/dumbFilter.pl @@ -0,0 +1,110 @@ +#!/usr/bin/env perl + +use JSON::XS; + +use strict; + +binmode(STDOUT, ":utf8"); + +my $filterJson = shift || die "need filter"; +my $filter = decode_json($filterJson); + +while() { + my $ev = decode_json($_); + + if (doesMatch($ev, $filter)) { + print $_; + } +} + + +sub doesMatch { + my $ev = shift; + my $filter = shift; + + $filter = [$filter] if ref $filter eq 'HASH'; + + foreach my $singleFilter (@$filter) { + return 1 if doesMatchSingle($ev, $singleFilter); + } + + return 0; +} + +sub doesMatchSingle { + my $ev = shift; + my $filter = shift; + + if (defined $filter->{since}) { + return 0 if $ev->{created_at} < $filter->{since}; + } + + if (defined $filter->{until}) { + return 0 if $ev->{created_at} > $filter->{until}; + } + + if ($filter->{ids}) { + my $found; + foreach my $id (@{ $filter->{ids} }) { + if (startsWith($ev->{id}, $id)) { + $found = 1; + last; + } + } + return 0 if !$found; + } + + if ($filter->{authors}) { + my $found; + foreach my $author (@{ $filter->{authors} }) { + if (startsWith($ev->{pubkey}, $author)) { + $found = 1; + last; + } + } + return 0 if !$found; + } + + if ($filter->{kinds}) { + my $found; + foreach my $kind (@{ $filter->{kinds} }) { + if ($ev->{kind} == $kind) { + $found = 1; + last; + } + } + return 0 if !$found; + } + + if ($filter->{'#e'}) { + my $found; + foreach my $search (@{ $filter->{'#e'} }) { + foreach my $tag (@{ $ev->{tags} }) { + if ($tag->[0] eq 'e' && $tag->[1] eq $search) { + $found = 1; + last; + } + } + } + return 0 if !$found; + } + + if ($filter->{'#p'}) { + my $found; + foreach my $search (@{ $filter->{'#p'} }) { + foreach my $tag (@{ $ev->{tags} }) { + if ($tag->[0] eq 'p' && $tag->[1] eq $search) { + $found = 1; + last; + } + } + } + return 0 if !$found; + } + + return 1; +} + +sub startsWith { + return rindex($_[0], $_[1], 0) == 0; +} diff --git a/test/filterFuzzTest.pl b/test/filterFuzzTest.pl new file mode 100644 index 0000000..f6dff43 --- /dev/null +++ b/test/filterFuzzTest.pl @@ -0,0 +1,242 @@ +use strict; +use Data::Dumper; +use JSON::XS; +use IPC::Open2; + + +# ./strfry export|perl -MJSON::XS -nE '$z=decode_json($_); for my $t (@{$z->{tags}}) { say $t->[1] if $t->[0] eq "e"}'|sort|uniq -c|sort -rn|head -50|perl -nE '/\d+\s+(\w+)/ && say $1' + +# Don't forget to set 'maxFilterLimit = 1000000000000' in config + + +my $kinds = [qw/1 7 4 42 0 30 3 6/]; + +my $pubkeys = [qw{ +887645fef0ce0c3c1218d2f5d8e6132a19304cdc57cd20281d082f38cfea0072 +f4161c88558700d23af18d8a6386eb7d7fed769048e1297811dcc34e86858fb2 +32e1827635450ebb3c5a7d12c1f8e7b2b514439ac10a67eef3d9fd9c5c68e245 +3b57518d02e6acfd5eb7198530b2e351e5a52278fb2499d14b66db2b5791c512 +3235036bd0957dfb27ccda02d452d7c763be40c91a1ac082ba6983b25238388c +2183e94758481d0f124fbd93c56ccaa45e7e545ceeb8d52848f98253f497b975 +2d1ac20dbb78936ca088c7824683e7b59fb2774ac86a63c72480514d8cecc0aa +b2d670de53b27691c0c3400225b65c35a26d06093bcc41f48ffc71e0907f9d4a +3104f98515b3aa147d55d9c2951e0f953b829d8724381d8f0d824125d7727634 +5c10ed0678805156d39ef1ef6d46110fe1e7e590ae04986ccf48ba1299cb53e2 +c2b373077c70c490e540ce029a346949b893964dd9e06da1d2facbc49d6ffe5c +47f7163bed3bdb80dc8b514693293d588710607018855cb5a53f4bb6ddba8377 +9ec7a778167afb1d30c4833de9322da0c08ba71a69e1911d5578d3144bb56437 +6da123ce3bb5245484a84ad9f57c32f4da01f4d67f9905c530ca2e9691ea68de +f19c4407f08fc3e9b2957f290272f6d8c2ebae5854704a03f5900779b8aaa664 +543210b5f6c3071c3135d850449f8bf91efffb5ed1153e5fcbb2d95b79262b57 +00000000827ffaa94bfea288c3dfce4422c794fbb96625b6b31e9049f729d700 +42b3f07844b2ad2717078abb47019369cee2aeae79469f8313ede9d75806cf61 +4b5520fd1bdcb6f11a8847e2c980f07ba873488a097467186ffeb68f955b9273 +552d5a1dcdc23eee687934791ae6da53e36e038924b314729cb7641745e78563 +}]; + +my $ids = [qw{ +25e5c82273a271cb1a840d0060391a0bf4965cafeb029d5ab55350b418953fbb +ca178c4ecea83fa7f7b04345be4587cf03c7d8775f50014e31caf6869a626354 +4211fc228be5af10923f56e60b1b11b8e63bf0ac7dbd3e1e3d767392fdaed4a4 +f06a690997a1b7d8283c90a7224eb8b7fe96b7c3d3d8cc7b2e7f743532c02b42 +2b48218edd23e88fd33ec23d6d91fd7203a26497d74d4ba54cbae91e3b6e169e +6c99281bf6ff2715fddcdd1d255db5b93a852930acea28a09374d9de868dcfab +936041f4a0b0625e08982e98b85795396b391400750638698bd71269271f5bdd +7ac6abd15d03736ef883716ac152ad8d066a748fc8e048b542decea52496c12b +9ba8717d61d9dfdad7d7b260ae33566241e3a55ecd26c2dcb944b47b1ef21eb7 +3561b3054737b1b126e607d574f230ca17ababe6ef803070e8967c3de607a620 +59c23027c484936ccbf408369fc8105467b15e142213737631fcf3518017e168 +b1791d7fc9ae3d38966568c257ffb3a02cbf8394cdb4805bc70f64fc3c0b6879 +52cab2e3e504ad6447d284b85b5cc601ca0613b151641e77facfec851c2ca816 +a382aed3ba436a7d6c98ec41e2477b370e0332689cdb04b09b8dd8a95d1210f9 +453c1d471f6cfd6d99fbb344e61229f9a0a1d8c96764b5ac1a8f0aa785e293a5 +020b587a1627e42d4b94f14b29d6cd9328635712b1e75daed9c178815d6b2f5f +c773cfe264b3035ebfbbc2b5c874a1859f671320ab24e09bd56559ec4e48e903 +fbd74e99301046798d0dbbef6ff3e14ea1305884eeb09068f84a00361501a0d4 +83ee9be878407dc4a9f8a6cfff54227a66745532a78b56443e36b4c3c3711189 +342060554ca30a9792f6e6959675ae734aed02c23e35037d2a0f72ac6316e83d +01846005bb00245e06bdb9ab4f85f0b0624ac408816bf1c0c691ebb6dcaba23e +846b8aa598d81379f7f36f5a94165d1a3b5e4cc080f3badd681e75aa03e8a806 +08470808369f03c2157607ffcb441f91305d207f249aae4c08373bdbba2431ea +1f304d32f1db468ca84fb15b7182a38a8511a991ece50920683efc23461550c7 +81888882d8183843299dc6625746c69d5cc37281c1c62aa69d63aa6e9f197a31 +ff693a532e4f2bd3ae7657b12174d338ca906fc9bea18910b06a795c4552d4c5 +2b7a291d69c07f5523837616634a9f182ab2833f2a3ce21312b8c400963f366b +b0d766f01b1cc883a21c5dc2553f1a7246254d61f08760717413c9b570510f88 +032356b66ee1608f156c800f261b36aac254d49f895e2cd725f19bbdfeb8a8c5 +a6e1e4ea75050c57a814bb4f098d0690b3577cd84fbc0db74b0fd3e924db3071 +81ed7bbaeb3bbec4f3816605dfb45cad85dfd99931df266df8d018d65d874bbd +786485a61011ceed9e373abdf6485c5ca070e2bcc50c457b8f817cd275bcbf00 +00000ea184b1d9e3688ddbfd13d2f8bc0893ab73d8a5c539b85c7d168ec5423e +f25c0cddcca28603db780fdefceeebf1cc3b3ce69f48bb0dcbb4c1d0bfd68d6c +b7436420d5ce4521ba3130e522414eb146814dae74434108688dcd225d5db5ea +d4c9bc1ca5dff4a371f8f10f24013211c67e789d353913f28ae24be13f267c58 +26b2d82df41d68da1b684ac99b4adadc2d272d49590155850789251cc3c80f84 +6717375662b966120041dbe5cad98d6704861d57589a40ad7cfc5e250d653511 +ec25b9c7ff8fa8ccdc7d2e3bfa06df82448a88c40212c6d19bce4a6f747b736b +508874b1c08c9c57f174c5101eda831362f30cbc4147e96f5b9b5338b7c3654d +07c9ee7d5704d544cec36f7888b3fa6183cee744e598f603fde5e06be8f88c81 +9a8e1f60401d277c36986fad81234d14a655463ecdeae74b2f89754bc07a109a +ea5d104277e42b35ca260fa7006a119c4d2b1404d5c53c94d67973c6668acf5c +4e79025204e0860dc601a2d7147005f6173d7ae7a9cd782da71e6dbab9d22b37 +d6c246cc94a9348bdf4f71e867db235d2ba457007b669984003d845c4dd7237f +0333ed329f08aadabb62d099809ab0fa05de0c4bcc2c5aeea9244456ae607e71 +47b2a3875e37296ac5b872f94bda9a9dab52a71e1d161b861b5f7691bca2189f +6c09c4dca9a3466f22305084639f685faf2d5d62765a57064a35f3d20fe70559 +c1e5e04d92d9bd20701bff4cbdac1cdc317d405035883b7adcf9a6a5308d0f54 +3a15cb7cf951de54a23585ca003c96ca9a7c49fbf8e436575ff9bb710af301f0 +}]; + +sub genRandomFilterGroup { + my $numFilters = (rand()*10)+1; + + my @filters; + for (1..$numFilters) { + my $f = {}; + + while (!keys %$f) { + if (rand() < .15) { + for (1..(rand()*10) + 1) { + push @{$f->{ids}}, randPrefix($ids->[int(rand() * @$ids)]); + } + } + + if (rand() < .3) { + for (1..(rand()*5) + 1) { + push @{$f->{authors}}, randPrefix($pubkeys->[int(rand() * @$pubkeys)]); + } + } + + if (rand() < .2) { + for (1..(rand()*5) + 1) { + push @{$f->{kinds}}, 0+$kinds->[int(rand() * @$kinds)]; + } + } + + if (rand() < .2) { + for (1..(rand()*10) + 1) { + push @{$f->{'#e'}}, $ids->[int(rand() * @$ids)]; + } + } + + if (rand() < .2) { + for (1..(rand()*5) + 1) { + push @{$f->{'#p'}}, $pubkeys->[int(rand() * @$pubkeys)]; + } + } + } + + if (rand() < .2) { + $f->{since} = 1640300802 + int(rand() * 86400*365); + } + + if (rand() < .2) { + $f->{until} = 1640300802 + int(rand() * 86400*365); + } + + if ($f->{since} && $f->{until} && $f->{since} > $f->{until}) { + delete $f->{since}; + delete $f->{until}; + } + + push @filters, $f; + } + + return \@filters; +} + +sub randPrefix { + my $v = shift; + return $v if rand() < .5; + return substr($v, 0, (int(rand() * 20) + 1) * 2); +} + +sub genRandomMonitorCmds { + my $nextConnId = 1; + my @out; + + my $interestFg = genRandomFilterGroup(); + + my $iters = int(rand() * 1000) + 100; + + for my $i (1..$iters) { + if ($i == int($iters / 2)) { + push @out, ["sub", 1000000, "mysub", $interestFg]; + push @out, ["interest", 1000000, "mysub"]; + } elsif (rand() < .9) { + push @out, ["sub", $nextConnId++, "s" . int(rand() * 4), genRandomFilterGroup()]; + } elsif (rand() < .75) { + push @out, ["removeSub", int(rand() * $nextConnId) + 1, "s" . int(rand() * 4)]; + } else { + push @out, ["closeConn", int(rand() * $nextConnId) + 1]; + } + } + + return (\@out, $interestFg); +} + + +sub testScan { + my $fg = shift; + my $fge = encode_json($fg); + + #print JSON::XS->new->pretty(1)->encode($fg); + print "$fge\n"; + + my $resA = `./strfry --config test/strfry.conf export 2>/dev/null | perl test/dumbFilter.pl '$fge' | jq -r .pubkey | sort | sha256sum`; + my $resB = `./strfry --config test/strfry.conf scan '$fge' | jq -r .pubkey | sort | sha256sum`; + + print "$resA\n$resB\n"; + + if ($resA ne $resB) { + print STDERR "$fge\n"; + die "MISMATCH"; + } + + print "-----------MATCH OK-------------\n\n\n"; +} + + + +srand($ENV{SEED} || 0); + +my $cmd = shift; + +if ($cmd eq 'scan') { + while (1) { + my $fg = genRandomFilterGroup(); + testScan($fg); + } +} elsif ($cmd eq 'monitor') { + while (1) { + my ($monCmds, $interestFg) = genRandomMonitorCmds(); + + my $fge = encode_json($interestFg); + print "filt: $fge\n\n"; + + print "DOING MONS\n"; + my $pid = open2(my $outfile, my $infile, './strfry --config test/strfry.conf monitor | jq -r .pubkey | sort | sha256sum'); + for my $c (@$monCmds) { print $infile encode_json($c), "\n"; } + close($infile); + + my $resA = <$outfile>; + + waitpid($pid, 0); + my $child_exit_status = $? >> 8; + die "monitor cmd died" if $child_exit_status; + + print "DOING SCAN\n"; + my $resB = `./strfry --config test/strfry.conf scan '$fge' 2>/dev/null | jq -r .pubkey | sort | sha256sum`; + + print "$resA\n$resB\n"; + + if ($resA eq $resB) { + print "-----------MATCH OK-------------\n\n\n"; + } else { + print STDERR "$fge\n"; + die "MISMATCH"; + } + } +} else { + die "unknown cmd: $cmd"; +} diff --git a/test/strfry.conf b/test/strfry.conf new file mode 100644 index 0000000..1b4b462 --- /dev/null +++ b/test/strfry.conf @@ -0,0 +1,6 @@ +db = "./strfry-db/" + +relay { + port = 7777 + maxFilterLimit = 1000000000000 +}